Skip to content

Commit a29f881

Browse files
committed
Covering another alt loc bond edge case
1 parent 0ff1823 commit a29f881

3 files changed

Lines changed: 200 additions & 16 deletions

File tree

biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -128,30 +128,51 @@ private void formPeptideBonds() {
128128
continue;
129129
}
130130

131-
Atom carboxylC;
132-
Atom aminoN;
131+
List<Atom> carboxylCs = getAtoms(tail, "C");
132+
List<Atom> aminoNs = getAtoms(head, "N");
133133

134-
carboxylC = tail.getC();
135-
aminoN = head.getN();
136-
137-
138-
if (carboxylC == null || aminoN == null) {
134+
if (carboxylCs.isEmpty() || aminoNs.isEmpty()) {
139135
// some structures may be incomplete and not store info
140136
// about all of their atoms
141-
142137
continue;
143138
}
144139

145-
146-
if (Calc.getDistance(carboxylC, aminoN) < MAX_PEPTIDE_BOND_LENGTH) {
147-
new BondImpl(carboxylC, aminoN, 1);
140+
for (Atom carboxylC:carboxylCs) {
141+
for (Atom aminoN:aminoNs) {
142+
if (carboxylC.getAltLoc() != null && aminoN.getAltLoc()!=null &&
143+
carboxylC.getAltLoc()!=' ' && aminoN.getAltLoc()!=' ' &&
144+
carboxylC.getAltLoc() != aminoN.getAltLoc()) {
145+
logger.debug("Skipping peptide bond between atoms with differently named alt locs {} (altLoc '{}') -- {} (altLoc '{}')",
146+
carboxylC.toString(), carboxylC.getAltLoc(), aminoN.toString(), aminoN.getAltLoc());
147+
continue;
148+
}
149+
if (Calc.getDistance(carboxylC, aminoN) < MAX_PEPTIDE_BOND_LENGTH) {
150+
new BondImpl(carboxylC, aminoN, 1);
151+
}
152+
}
148153
}
149-
150154
}
151155
}
152156
}
153157
}
154158

159+
/**
160+
* Get all atoms (including possible alt locs) in given group that are name with the given atom name
161+
* @param g the group
162+
* @param name the atom name
163+
* @return list of all atoms, or empty list if no atoms with the name
164+
*/
165+
private List<Atom> getAtoms(Group g, String name) {
166+
List<Atom> atoms = new ArrayList<>();
167+
List<Group> groupsWithAltLocs = new ArrayList<>();
168+
groupsWithAltLocs.add(g);
169+
groupsWithAltLocs.addAll(g.getAltLocs());
170+
for (Group group : groupsWithAltLocs) {
171+
atoms.add(group.getAtom(name));
172+
}
173+
return atoms;
174+
}
175+
155176
private void formNucleotideBonds() {
156177
for (int modelInd=0; modelInd<structure.nrModels(); modelInd++){
157178
for (Chain chain : structure.getChains(modelInd)) {

biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifConsumer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ private Entity getEntity(int entity_id){
213213
}
214214
}
215215
} catch (NumberFormatException e) {
216-
logger.warn("Entity id does not look like a number:", e.getMessage());
216+
logger.warn("Entity id does not look like a number: {}", e.getMessage());
217217
}
218218
return null;
219219
}
@@ -729,7 +729,7 @@ public void documentEnd() {
729729

730730
// we'll only add seqres chains that are polymeric or unknown
731731
if (type==null || type==EntityType.POLYMER ) {
732-
seqResChains.add(seqres);
732+
seqResChains.add(seqres);
733733
}
734734

735735
logger.debug(" seqres: " + asym.getId() + " " + seqres + "<") ;

biojava-structure/src/test/java/org/biojava/nbio/structure/TestAltLocs.java

Lines changed: 165 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,9 @@
3737
import java.io.IOException;
3838
import java.io.StringReader;
3939
import java.util.ArrayList;
40+
import java.util.HashSet;
4041
import java.util.List;
42+
import java.util.Set;
4143

4244
import static org.junit.Assert.*;
4345

@@ -755,11 +757,12 @@ public void testMmcifConversionAllAltlocs() throws IOException {
755757
}
756758

757759
/**
758-
* Test that bonds between alt locs link atoms with same altloc codes
760+
* Test that intra-residue bonds between alt locs link atoms with same altloc codes
759761
* https://github.com/rcsb/mmtf/issues/44
760762
*/
761763
@Test
762-
public void testBondsBetweenAltlocs() throws IOException {
764+
public void testIntraResidueBondsBetweenAltlocs() throws IOException {
765+
// from 5MOO
763766
String mmcifData =
764767
"data_test\n" +
765768
"loop_\n" +
@@ -858,4 +861,164 @@ public void testBondsBetweenAltlocs() throws IOException {
858861

859862
}
860863

864+
/**
865+
* Test that inter-residue bonds between alt locs link atoms with same altloc codes or default alt loc to all alt locs
866+
* https://github.com/rcsb/mmtf/issues/44
867+
*/
868+
@Test
869+
public void testInterResidueBondsBetweenAltlocs() throws IOException {
870+
// from 5MOO
871+
String mmcifData =
872+
"data_test\n" +
873+
"# \n" +
874+
"loop_\n" +
875+
"_entity.id \n" +
876+
"_entity.type \n" +
877+
"_entity.src_method \n" +
878+
"_entity.pdbx_description \n" +
879+
"_entity.formula_weight \n" +
880+
"_entity.pdbx_number_of_molecules \n" +
881+
"_entity.pdbx_ec \n" +
882+
"_entity.pdbx_mutation \n" +
883+
"_entity.pdbx_fragment \n" +
884+
"_entity.details \n" +
885+
"1 polymer nat 'Cationic trypsin' 23324.287 1 3.4.21.4 ? ? ? \n" +
886+
"# \n" +
887+
"loop_\n" +
888+
"_entity_poly_seq.entity_id \n" +
889+
"_entity_poly_seq.num \n" +
890+
"_entity_poly_seq.mon_id \n" +
891+
"_entity_poly_seq.hetero \n" +
892+
"1 1 ILE n \n" +
893+
"1 2 MET n \n" +
894+
"# \n" +
895+
"loop_\n" +
896+
"_struct_asym.id \n" +
897+
"_struct_asym.pdbx_blank_PDB_chainid_flag \n" +
898+
"_struct_asym.pdbx_modified \n" +
899+
"_struct_asym.entity_id \n" +
900+
"_struct_asym.details \n" +
901+
"A N N 1 ? \n" +
902+
"# \n" +
903+
"loop_\n" +
904+
"_atom_site.group_PDB \n" +
905+
"_atom_site.id \n" +
906+
"_atom_site.type_symbol \n" +
907+
"_atom_site.label_atom_id \n" +
908+
"_atom_site.label_alt_id \n" +
909+
"_atom_site.label_comp_id \n" +
910+
"_atom_site.label_asym_id \n" +
911+
"_atom_site.label_entity_id \n" +
912+
"_atom_site.label_seq_id \n" +
913+
"_atom_site.pdbx_PDB_ins_code \n" +
914+
"_atom_site.Cartn_x \n" +
915+
"_atom_site.Cartn_y \n" +
916+
"_atom_site.Cartn_z \n" +
917+
"_atom_site.occupancy \n" +
918+
"_atom_site.B_iso_or_equiv \n" +
919+
"_atom_site.pdbx_formal_charge \n" +
920+
"_atom_site.auth_seq_id \n" +
921+
"_atom_site.auth_comp_id \n" +
922+
"_atom_site.auth_asym_id \n" +
923+
"_atom_site.auth_atom_id \n" +
924+
"_atom_site.pdbx_PDB_model_num \n" +
925+
"ATOM 1385 N N . ILE A 1 1 ? 10.900 -16.328 -10.274 1.00 17.47 ? 103 ILE A N 1 \n" +
926+
"ATOM 1386 C CA . ILE A 1 1 ? 10.885 -17.487 -9.388 1.00 17.76 ? 103 ILE A CA 1 \n" +
927+
"ATOM 1387 C C . ILE A 1 1 ? 11.374 -17.058 -8.011 1.00 17.35 ? 103 ILE A C 1 \n" +
928+
"ATOM 1388 O O . ILE A 1 1 ? 12.265 -16.211 -7.883 1.00 18.51 ? 103 ILE A O 1 \n" +
929+
"ATOM 1389 C CB . ILE A 1 1 ? 11.721 -18.644 -9.986 1.00 18.19 ? 103 ILE A CB 1 \n" +
930+
"ATOM 1390 C CG1 . ILE A 1 1 ? 11.610 -19.916 -9.144 1.00 19.64 ? 103 ILE A CG1 1 \n" +
931+
"ATOM 1391 C CG2 . ILE A 1 1 ? 13.177 -18.246 -10.209 1.00 19.73 ? 103 ILE A CG2 1 \n" +
932+
"ATOM 1392 C CD1 . ILE A 1 1 ? 12.217 -21.162 -9.820 1.00 22.94 ? 103 ILE A CD1 1 \n" +
933+
"ATOM 1393 H H A ILE A 1 1 ? 11.598 -15.614 -10.041 1.00 17.71 ? 103 ILE A H 1 \n" +
934+
"ATOM 1394 D D B ILE A 1 1 ? 11.598 -15.614 -10.041 0.00 17.71 ? 103 ILE A D 1 \n" +
935+
"ATOM 1395 H HA . ILE A 1 1 ? 9.856 -17.843 -9.277 1.00 17.70 ? 103 ILE A HA 1 \n" +
936+
"ATOM 1396 H HB . ILE A 1 1 ? 11.300 -18.886 -10.957 1.00 18.93 ? 103 ILE A HB 1 \n" +
937+
"ATOM 1397 H HG12 . ILE A 1 1 ? 12.149 -19.788 -8.209 1.00 20.93 ? 103 ILE A HG12 1 \n" +
938+
"ATOM 1398 H HG13 . ILE A 1 1 ? 10.563 -20.127 -8.939 1.00 20.93 ? 103 ILE A HG13 1 \n" +
939+
"ATOM 1399 H HG21 . ILE A 1 1 ? 13.669 -19.035 -10.776 1.00 20.97 ? 103 ILE A HG21 1 \n" +
940+
"ATOM 1400 H HG22 . ILE A 1 1 ? 13.235 -17.312 -10.767 1.00 20.97 ? 103 ILE A HG22 1 \n" +
941+
"ATOM 1401 H HG23 . ILE A 1 1 ? 13.683 -18.144 -9.251 1.00 20.97 ? 103 ILE A HG23 1 \n" +
942+
"ATOM 1402 H HD11 . ILE A 1 1 ? 13.299 -21.078 -9.905 1.00 24.96 ? 103 ILE A HD11 1 \n" +
943+
"ATOM 1403 H HD12 . ILE A 1 1 ? 11.967 -22.036 -9.223 1.00 24.96 ? 103 ILE A HD12 1 \n" +
944+
"ATOM 1404 H HD13 . ILE A 1 1 ? 11.779 -21.281 -10.808 1.00 24.96 ? 103 ILE A HD13 1 \n" +
945+
"ATOM 1405 N N A MET A 1 2 ? 10.748 -17.610 -6.975 0.47 16.12 ? 104 MET A N 1 \n" +
946+
"ATOM 1406 N N B MET A 1 2 ? 10.802 -17.694 -6.986 0.53 17.92 ? 104 MET A N 1 \n" +
947+
"ATOM 1407 C CA A MET A 1 2 ? 11.189 -17.392 -5.610 0.47 15.78 ? 104 MET A CA 1 \n" +
948+
"ATOM 1408 C CA B MET A 1 2 ? 11.033 -17.368 -5.587 0.53 18.29 ? 104 MET A CA 1 \n" +
949+
"ATOM 1409 C C A MET A 1 2 ? 10.952 -18.663 -4.810 0.47 15.91 ? 104 MET A C 1 \n" +
950+
"ATOM 1410 C C B MET A 1 2 ? 10.882 -18.643 -4.767 0.53 17.40 ? 104 MET A C 1 \n" +
951+
"ATOM 1411 O O A MET A 1 2 ? 10.120 -19.504 -5.154 0.47 18.21 ? 104 MET A O 1 \n" +
952+
"ATOM 1412 O O B MET A 1 2 ? 10.018 -19.474 -5.052 0.53 20.02 ? 104 MET A O 1 \n" +
953+
"ATOM 1413 C CB A MET A 1 2 ? 10.477 -16.204 -4.933 0.47 17.14 ? 104 MET A CB 1 \n" +
954+
"ATOM 1414 C CB B MET A 1 2 ? 10.001 -16.336 -5.111 0.53 18.92 ? 104 MET A CB 1 \n" +
955+
"ATOM 1415 C CG A MET A 1 2 ? 9.019 -16.476 -4.619 0.47 20.01 ? 104 MET A CG 1 \n" +
956+
"ATOM 1416 C CG B MET A 1 2 ? 10.030 -16.038 -3.634 0.53 19.12 ? 104 MET A CG 1 \n" +
957+
"ATOM 1417 S SD A MET A 1 2 ? 8.207 -15.088 -3.838 0.47 22.06 ? 104 MET A SD 1 \n" +
958+
"ATOM 1418 S SD B MET A 1 2 ? 8.874 -14.724 -3.205 0.53 20.16 ? 104 MET A SD 1 \n" +
959+
"ATOM 1419 C CE A MET A 1 2 ? 9.151 -14.973 -2.340 0.47 25.15 ? 104 MET A CE 1 \n" +
960+
"ATOM 1420 C CE B MET A 1 2 ? 7.269 -15.536 -3.380 0.53 20.38 ? 104 MET A CE 1 \n" +
961+
"ATOM 1421 H H A MET A 1 2 ? 9.931 -18.207 -7.055 0.47 15.58 ? 104 MET A H 1 \n" +
962+
"ATOM 1422 H H B MET A 1 2 ? 10.144 -18.461 -7.109 0.53 18.91 ? 104 MET A H 1 \n" +
963+
"ATOM 1423 H HA A MET A 1 2 ? 12.256 -17.182 -5.644 0.47 15.14 ? 104 MET A HA 1 \n" +
964+
"ATOM 1424 H HA B MET A 1 2 ? 12.033 -16.953 -5.465 0.53 19.55 ? 104 MET A HA 1 \n" +
965+
"ATOM 1425 H HB2 A MET A 1 2 ? 10.986 -15.920 -4.008 0.47 17.68 ? 104 MET A HB2 1 \n" +
966+
"ATOM 1426 H HB3 A MET A 1 2 ? 10.484 -15.364 -5.622 0.47 17.68 ? 104 MET A HB3 1 \n" +
967+
"ATOM 1427 H HB3 B MET A 1 2 ? 9.001 -16.676 -5.398 0.53 20.49 ? 104 MET A HB3 1 \n" +
968+
"ATOM 1428 H HG2 A MET A 1 2 ? 8.490 -16.704 -5.546 0.47 20.93 ? 104 MET A HG2 1 \n" +
969+
"ATOM 1429 H HG3 A MET A 1 2 ? 8.956 -17.315 -3.927 0.47 20.93 ? 104 MET A HG3 1 \n" +
970+
"ATOM 1430 H HE2 A MET A 1 2 ? 9.861 -14.153 -2.440 0.47 27.31 ? 104 MET A HE2 1 \n" +
971+
"ATOM 1431 H HE2 B MET A 1 2 ? 7.346 -16.554 -2.998 0.53 23.03 ? 104 MET A HE2 1 \n" +
972+
"ATOM 1432 H HE3 B MET A 1 2 ? 6.996 -15.566 -4.437 0.53 23.03 ? 104 MET A HE3 1 ";
973+
974+
SimpleMMcifParser parser = new SimpleMMcifParser();
975+
SimpleMMcifConsumer consumer = new SimpleMMcifConsumer();
976+
parser.addMMcifConsumer(consumer);
977+
978+
FileParsingParameters params = new FileParsingParameters();
979+
params.setCreateAtomBonds(true);
980+
consumer.setFileParsingParameters(params);
981+
982+
BufferedReader buf = new BufferedReader(new StringReader(mmcifData));
983+
parser.parse(buf);
984+
buf.close();
985+
986+
Structure s = consumer.getStructure();
987+
Chain c = s.getPolyChains().get(0);
988+
assertEquals(2, c.getAtomGroups().size());
989+
990+
// inter residue bonds and alt locs
991+
// ILE-C (.) must be linked to both MET-N (A and B alt locs)
992+
Group g1 = c.getAtomGroup(0);
993+
994+
Atom catom = g1.getAtom("C");
995+
List<Bond> bonds = new ArrayList<>();
996+
for (Bond b : catom.getBonds()) {
997+
if (b.getAtomA().getName().equals("N") || b.getAtomB().getName().equals("N")) {
998+
bonds.add(b);
999+
}
1000+
}
1001+
1002+
assertEquals(2, bonds.size());
1003+
1004+
Set<Character> seenAltLocs = new HashSet<>();
1005+
for (Bond b : bonds) {
1006+
Atom aAtom = b.getAtomA();
1007+
Atom bAtom = b.getAtomB();
1008+
Atom nAtom;
1009+
if (aAtom.getName().equals("N")) {
1010+
nAtom = aAtom;
1011+
} else {
1012+
nAtom = bAtom;
1013+
}
1014+
seenAltLocs.add(nAtom.getAltLoc());
1015+
}
1016+
// 2 distinct N atoms: alt loc A and B
1017+
assertEquals(2, seenAltLocs.size());
1018+
assertTrue(seenAltLocs.contains('A'));
1019+
assertTrue(seenAltLocs.contains('B'));
1020+
1021+
}
1022+
1023+
8611024
}

0 commit comments

Comments
 (0)