@@ -615,86 +615,9 @@ public void documentEnd() {
615615 seqResChains .add (seqres );
616616 logger .debug (" seqres: " + asym .getId () + " " + seqres + "<" ) ;
617617
618+ // adding the compounds (entities)
619+ addCompounds (asym );
618620
619- int eId = 0 ;
620- try {
621- eId = Integer .parseInt (asym .getEntity_id ());
622- } catch (NumberFormatException e ) {
623- logger .warn ("Could not parse mol_id from string {}. Will use 0 for creating Compound" ,asym .getEntity_id ());
624- }
625- Entity e = getEntity (eId );
626-
627- for (EntitySrcGen esg : entitySrcGens ) {
628-
629- if (! esg .getEntity_id ().equals (asym .getEntity_id ()))
630- continue ;
631-
632- // found the matching EntitySrcGen
633- // get the corresponding Entity
634- Compound c = structure .getCompoundById (eId );
635- if ( c == null ){
636- if (e !=null && e .getType ().equals ("polymer" )) {
637- c = createNewCompoundFromESG (esg , eId );
638- c .setMolName (e .getPdbx_description ());
639- structure .addCompound (c );
640- logger .debug ("Adding Compound with entity id {} from _entity_src_syn, with name: {}" ,eId ,c .getMolName ());
641- }
642- }
643-
644- }
645-
646- for (EntitySrcNat esn : entitySrcNats ) {
647- if (! esn .getEntity_id ().equals (asym .getEntity_id ()))
648- continue ;
649-
650- // found the matching EntitySrcGen
651- // get the corresponding Entity
652- Compound c = structure .getCompoundById (eId );
653- if ( c == null ){
654- if (e !=null && e .getType ().equals ("polymer" )) {
655- c = createNewCompoundFromESN (esn , eId );
656- c .setMolName (e .getPdbx_description ());
657- structure .addCompound (c );
658- logger .debug ("Adding Compound with entity id {} from _entity_src_syn, with name: {}" ,eId ,c .getMolName ());
659- }
660- }
661-
662- }
663-
664- for (EntitySrcSyn ess : entitySrcSyns ) {
665- if (! ess .getEntity_id ().equals (asym .getEntity_id ()))
666- continue ;
667-
668- // found the matching EntitySrcGen
669- // get the corresponding Entity
670- Compound c = structure .getCompoundById (eId );
671- if ( c == null ){
672- if (e !=null && e .getType ().equals ("polymer" )) {
673- c = createNewCompoundFromESS (ess , eId );
674- c .setMolName (e .getPdbx_description ());
675- structure .addCompound (c );
676- logger .debug ("Adding Compound with entity id {} from _entity_src_syn, with name: {}" ,eId ,c .getMolName ());
677- }
678- }
679- }
680-
681- // for some mmCIF files like 1yrm all 3 of _entity_src_gen, _entity_src_nat and _pdbx_entity_src_syn are missing
682- // we need to fill the Compounds in some other way:
683-
684- Compound c = structure .getCompoundById (eId );
685-
686- if (c ==null ) {
687- c = new Compound ();
688- c .setMolId (eId );
689-
690- // we only add the compound if a polymeric one (to match what the PDB parser does)
691- if (e !=null && e .getType ().equals ("polymer" )) {
692- c .setMolName (e .getPdbx_description ());
693- structure .addCompound (c );
694- logger .debug ("Adding Compound with entity id {} from _entity, with name: {}" ,eId , c .getMolName ());
695- }
696- }
697-
698621 }
699622
700623 if ( params .isAlignSeqRes () ){
@@ -712,9 +635,21 @@ public void documentEnd() {
712635 // fix the chain IDS in the current model:
713636
714637 Set <String > asymIds = asymStrandId .keySet ();
638+
639+ if (asymIds .isEmpty ()) {
640+ logger .warn ("No asym ids mapping found in file (categories pdbx_poly_seq_scheme/pdbx_non_poly_seq_scheme). Will create fake asym ids" );
641+
642+ if (structure .nrModels ()==0 ) {
643+ logger .error ("We should have some models at this point, something is wrong! We'll have an empty structure" );
644+ } else {
645+ for (Chain chain : structure .getModel (0 )) {
646+ asymStrandId .put (chain .getChainID (),chain .getChainID ());
647+ }
648+ }
649+ }
715650
716651 for (int i =0 ; i < structure .nrModels () ; i ++){
717- List <Chain >model = structure .getModel (i );
652+ List <Chain > model = structure .getModel (i );
718653
719654 List <Chain > pdbChains = new ArrayList <Chain >();
720655
@@ -761,6 +696,12 @@ public void documentEnd() {
761696 while (it .hasNext ()) {
762697 Chain chain = it .next ();
763698 String entityId = asymId2entityId .get (chain .getInternalChainID ());
699+ if (entityId ==null ) {
700+ // this can happen for instance if the cif file didn't have _struct_asym category at all
701+ // and thus we have no asymId2entityId mapping at all
702+ logger .warn ("No entity id could be found for chain {}" , chain .getInternalChainID ());
703+ continue ;
704+ }
764705 int eId = Integer .parseInt (entityId );
765706 // We didn't add above compounds for nonpolymeric entities, thus here if a chain is nonpolymeric
766707 // its compound won't be found. In biojava Structure data model a nonpolymeric chain does not really
@@ -985,6 +926,87 @@ private int getInternalNr(Group atomG) {
985926 return new Long (he .getId ()).intValue ();
986927 }
987928 }
929+
930+ private void addCompounds (StructAsym asym ) {
931+ int eId = 0 ;
932+ try {
933+ eId = Integer .parseInt (asym .getEntity_id ());
934+ } catch (NumberFormatException e ) {
935+ logger .warn ("Could not parse mol_id from string {}. Will use 0 for creating Compound" ,asym .getEntity_id ());
936+ }
937+ Entity e = getEntity (eId );
938+
939+ for (EntitySrcGen esg : entitySrcGens ) {
940+
941+ if (! esg .getEntity_id ().equals (asym .getEntity_id ()))
942+ continue ;
943+
944+ // found the matching EntitySrcGen
945+ // get the corresponding Entity
946+ Compound c = structure .getCompoundById (eId );
947+ if ( c == null ){
948+ if (e !=null && e .getType ().equals ("polymer" )) {
949+ c = createNewCompoundFromESG (esg , eId );
950+ c .setMolName (e .getPdbx_description ());
951+ structure .addCompound (c );
952+ logger .debug ("Adding Compound with entity id {} from _entity_src_syn, with name: {}" ,eId ,c .getMolName ());
953+ }
954+ }
955+
956+ }
957+
958+ for (EntitySrcNat esn : entitySrcNats ) {
959+ if (! esn .getEntity_id ().equals (asym .getEntity_id ()))
960+ continue ;
961+
962+ // found the matching EntitySrcGen
963+ // get the corresponding Entity
964+ Compound c = structure .getCompoundById (eId );
965+ if ( c == null ){
966+ if (e !=null && e .getType ().equals ("polymer" )) {
967+ c = createNewCompoundFromESN (esn , eId );
968+ c .setMolName (e .getPdbx_description ());
969+ structure .addCompound (c );
970+ logger .debug ("Adding Compound with entity id {} from _entity_src_syn, with name: {}" ,eId ,c .getMolName ());
971+ }
972+ }
973+
974+ }
975+
976+ for (EntitySrcSyn ess : entitySrcSyns ) {
977+ if (! ess .getEntity_id ().equals (asym .getEntity_id ()))
978+ continue ;
979+
980+ // found the matching EntitySrcGen
981+ // get the corresponding Entity
982+ Compound c = structure .getCompoundById (eId );
983+ if ( c == null ){
984+ if (e !=null && e .getType ().equals ("polymer" )) {
985+ c = createNewCompoundFromESS (ess , eId );
986+ c .setMolName (e .getPdbx_description ());
987+ structure .addCompound (c );
988+ logger .debug ("Adding Compound with entity id {} from _entity_src_syn, with name: {}" ,eId ,c .getMolName ());
989+ }
990+ }
991+ }
992+
993+ // for some mmCIF files like 1yrm all 3 of _entity_src_gen, _entity_src_nat and _pdbx_entity_src_syn are missing
994+ // we need to fill the Compounds in some other way:
995+
996+ Compound c = structure .getCompoundById (eId );
997+
998+ if (c ==null ) {
999+ c = new Compound ();
1000+ c .setMolId (eId );
1001+
1002+ // we only add the compound if a polymeric one (to match what the PDB parser does)
1003+ if (e !=null && e .getType ().equals ("polymer" )) {
1004+ c .setMolName (e .getPdbx_description ());
1005+ structure .addCompound (c );
1006+ logger .debug ("Adding Compound with entity id {} from _entity, with name: {}" ,eId , c .getMolName ());
1007+ }
1008+ }
1009+ }
9881010
9891011 private Compound createNewCompoundFromESG (EntitySrcGen esg , int eId ) {
9901012
0 commit comments