Merge pull request #2 from nmendozam/master

Refactoring some functions
gibbslab · Oct 25, 2020 · a7d271d · a7d271d
2 parents 517f11b + a57b651
commit a7d271d
Show file tree

Hide file tree

Showing 14 changed files with 217 additions and 164 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -4,11 +4,14 @@ Title: Find and Fill Gaps in Metabolic Networks
 Version: 0.3
 Authors@R: c(person("Kelly","Botero",role=c("aut")),
              person("Daniel","Osorio",email="[email protected]",role=c("aut","cre")),
+             person("Nicolas","Mendoza",role=c("aut")),
+             person("Felipe","Rojas-Rodriguez",role=c("aut")),
              person("Janneth","Gonzalez",role=c("aut","ths")),
              person("Andres","Pinzon", role=c("aut","ths"))
              )
 Description: For a given metabolic network, this package finds the gaps (metabolites not produced or not consumed in any other reaction), and fills it from the stoichiometric reactions of a reference metabolic reconstruction using a weighting function. Also the option to download all the set of gene-associated stoichiometric reactions for a specific organism from the KEGG database <http://www.genome.jp/kegg/> is available.
 Imports: KEGGREST, minval (>= 0.5), sybil
 License: GPL-2
 LazyData: TRUE
-RoxygenNote: 6.0.1
+RoxygenNote: 7.1.1
+BugReports: https://github.com/gibbslab/g2f/issues
diff --git a/NAMESPACE b/NAMESPACE
@@ -9,6 +9,7 @@ importFrom("KEGGREST","keggGet")
 importFrom("KEGGREST","keggLink")
 importFrom("KEGGREST","keggList")
 importFrom("minval","metabolites")
+importFrom("minval","orphanMetabolites")
 importFrom("minval","orphanProducts")
 importFrom("minval","orphanReactants")
 importFrom("minval","products")

diff --git a/R/additionCost.R b/R/additionCost.R
@@ -18,35 +18,37 @@
 #' \item \code{CO2[c] <=> }
 #' }
 #' @param reference A different set of stoichiometric reactions with the same format of reactionList. This is the metabolic network expected to gap filling.
-#' @examples 
+#' @examples
 #' # Generate a reference, a vector of stoichiometric reactions as described above.
-#' 
+#'
 #' example_reference <- vector()
 #' example_reference[1] <- "A + B => C + D"
 #' example_reference[2] <- "B + D => E"
-#' 
+#'
 #' # Generate a set of reactions to compute the addition cost.
-#' 
+#'
 #' example_reactionList <- vector()
 #' example_reactionList[1] <- "A + E => F + G"
 #' example_reactionList[2] <- "A + E <=> F"
-#' 
-#' # The addition cost function will identify those metabolites not included in 
+#'
+#' # The addition cost function will identify those metabolites not included in
 #' # the reference (F and G) and will compute the addition cost for each reaction
-#' # as the ratio of the number of metabolites not included in the reference 
+#' # as the ratio of the number of metabolites not included in the reference
 #' # over the total of metabolites in the reaction (2/4 and 1/3 respectively).
-#' 
+#'
 #' additionCost(reactionList = example_reactionList, reference = example_reference)
 #' # [1] 0.5000000 0.3333333
-additionCost <- function(reactionList,reference){
-  if(class(reactionList) == "modelorg"){
+additionCost <- function(reactionList, reference) {
+  if (class(reactionList) == "modelorg") {
     reactionList <- model2string(model = reactionList)
   }
-  if(class(reference) == "modelorg"){
+  if (class(reference) == "modelorg") {
     reference <- model2string(model = reference)
   }
   mets_r <- metabolites(reference)
-  mets <- lapply(reactionList,metabolites)
-  cost <- unlist(lapply(mets, function(metabolites){mean(!metabolites %in% mets_r)}))
+  mets <- lapply(reactionList, metabolites)
+  cost <- unlist(lapply(mets, function(metabolites) {
+    mean(!metabolites %in% mets_r)
+  }))
   return(cost)
 }
diff --git a/R/blockedReactions.R b/R/blockedReactions.R
@@ -11,21 +11,21 @@
 #' \dontrun{
 #' # Loading a model for the 'sybil' package
 #' data("Ec_core")
-#' 
+#'
 #' # Identifying blocked reactions
 #' blockedReactions(Ec_core)}
 #' @keywords Blocked reactions genome scale metabolic reconstruction
-blockedReactions <- function(model){
+blockedReactions <- function(model) {
   locked <- NULL
-  pb <- txtProgressBar(min = 1,max = model@react_num,style=3)
+  pb <- txtProgressBar(min = 1, max = model@react_num, style = 3)
   for (reaction in 1:model@react_num) {
     setTxtProgressBar(pb, reaction)
     model@obj_coef <- rep(0, model@react_num)
     model@obj_coef[reaction] <- 1
     FBA <- sybil::optimizeProb(model)
-    locked <- unique(c(locked, model@react_id[as.vector(FBA@fluxdist@fluxes!=0)]))
+    locked <- unique(c(locked, model@react_id[as.vector(FBA@fluxdist@fluxes != 0)]))
   }
   close(pb)
-  locked <- model@react_id[!model@react_id%in%locked]
+  locked <- model@react_id[!model@react_id %in% locked]
   return(locked)
-}
+}
diff --git a/R/extract.R b/R/extract.R
@@ -4,11 +4,11 @@ extract <- function(reaction) {
   if (grepl(";", reaction)) {
     parts <- unlist(strsplit(reaction, ";[[:blank:]]+"))
     return(parts[length(parts)])
-  } else{
+  } else {
     if (!grepl("rn:R[[:digit:]]+[[:blank:]]", reaction)) {
       return(reaction)
     } else {
       return(unlist(strsplit(reaction, "rn:R[[:digit:]]+[[:blank:]]"))[2])
     }
   }
-}
+}
diff --git a/R/gapFill.R b/R/gapFill.R
@@ -1,81 +1,90 @@
 #' @export gapFill
-#' @importFrom "minval" "orphanReactants" "orphanProducts" "reactants" "products"
+#' @importFrom "minval" "orphanReactants" "orphanProducts" "orphanMetabolites" "reactants" "products"
 #' @author Kelly Botero <[email protected]> - Maintainer: Daniel Camilo Osorio <[email protected]>
 #  Bioinformatics and Systems Biology Lab      | Universidad Nacional de Colombia
 #  Experimental and Computational Biochemistry | Pontificia Universidad Javeriana
 #' @title Find and fill gaps in a metabolic network
 #' @description This function identifies the gaps and fills it from the stoichiometric reactions of a reference metabolic reconstruction using a weighting function.
 #' @seealso \code{additionCost} function documentation.
-#' @param reactionList A set of stoichiometric reaction with the following format: 
-#' 
-#' \code{"H2O[c] + Urea-1-carboxylate[c] <=> 2 CO2[c] + 2 NH3[c]"} 
-#' 
+#' @param reactionList A set of stoichiometric reaction with the following format:
+#'
+#' \code{"H2O[c] + Urea-1-carboxylate[c] <=> 2 CO2[c] + 2 NH3[c]"}
+#'
 #' Where arrows and plus signs are surrounded by a "space character".
 #' It is also expected that stoichiometry coefficients are surrounded by spaces, (nothe the "2" before the CO2[c] or the NH3[c]).
-#' It also expects arrows to be in the form "\code{=>}" or "\code{<=>}". 
+#' It also expects arrows to be in the form "\code{=>}" or "\code{<=>}".
 #' Meaning that arrows like "\code{==>}", "\code{<==>}", "\code{-->}" or "\code{->}" will not be parsed and will lead to errors.
 #' @param reference A set of stoichiometric reaction with the same format of reactionList
 #' @param limit An addition cost value to be used as a limit to select reactions to be added. Is calculated as NumberNewMetabolites/NumerOfMetabolites for each reaction.
 #' @param woCompartment A boolean value \code{TRUE} to define if compartment labels should be removed of the reactionList stoichiometric reactions, \code{FALSE} is used as default.
 #' @param consensus A boolean value \code{TRUE} to define if reactionList and newReactions should be reported as a unique vector or \code{FALSE} if just newReactions should be reported.
-#' 
-#' @examples 
+#' @param nRun The number of iterations to search for new gaps and fill them according to the score, the default is five.
+#'
+#' @examples
 #' \dontrun{
 #' # Downloading stoichiometric reactions
-#' all <- getReference(organism = "all",sep = ";")
-#' eco <- getReference(organism = "eco",sep = ";")
-#' 
+#' all <- getReference(organism = "all", sep = ";")
+#' eco <- getReference(organism = "eco", sep = ";")
+#'
 #' # Filtering reactions
-#' all <- mapReactions(reactionList = all$reaction%in%eco$reaction,
-#'                     referenceData = all,
-#'                     by = "bool",
-#'                     inverse = TRUE)
-#'                     
+#' all <- mapReactions(
+#'   reactionList = all$reaction %in% eco$reaction,
+#'   referenceData = all,
+#'   by = "bool",
+#'   inverse = TRUE
+#' )
+#'
 #' # gapFill
-#' gapFill(reactionList = eco$reaction,
-#'         reference = all$reaction, 
-#'         limit = 0.25,
-#'         woCompartment = TRUE,
-#'         consensus = FALSE)}
-gapFill <- function(reactionList, reference, limit = 0.25, nRun = 5, woCompartment=FALSE, consensus=FALSE){
+#' gapFill(
+#'   reactionList = eco$reaction,
+#'   reference = all$reaction,
+#'   limit = 0.25,
+#'   woCompartment = TRUE,
+#'   consensus = FALSE
+#' )
+#' }
+gapFill <- function(reactionList, reference, limit = 0.25, nRun = 5, woCompartment = FALSE, consensus = FALSE) {
   reference_reactants <- reactants(reference)
   reference_products <- products(reference)
   newR <- NULL
   n <- 0
   while (n < nRun) {
     oR <- orphanReactants(reactionList)
     oP <- orphanProducts(reactionList)
-    repeat({
-      aC <- additionCost(reactionList = reference, reference = reactionList)
-      rA <- reference[aC <= limit]
-      toAdd <- rA[unlist(lapply(reference_reactants[aC <= limit],function(sR){any(sR %in% oR)}))]
-      if(!all(toAdd %in% newR)){
-        newR <- unique(c(newR,toAdd))
-        reactionList <- unique(c(reactionList,newR))
-      } else {
-        break()
-      }
-    })
-    repeat({
-      aC <- additionCost(reactionList = reference, reference = reactionList)
-      rA <- reference[aC <= limit]
-      toAdd <- rA[unlist(lapply(reference_products[aC <= limit],function(sP){any(sP %in% oP)}))]
-      if(!all(toAdd %in% newR)){
-        newR <- unique(c(newR,toAdd))
-        reactionList <- unique(c(reactionList,newR))
-      } else {
-        break()
-      }
-    })
+
+    filledReactions <- fillOrphanMetabolites(reference, reactionList, limit, reference_reactants, oR, newR)
+    newR <- filledReactions$new
+    reactionList <- filledReactions$summary
+    filledReactions <- fillOrphanMetabolites(reference, reactionList, limit, reference_products, oP, newR)
+    newR <- filledReactions$new
+    reactionList <- filledReactions$summary
+
     n <- n + 1
-    message(round(mean(!unique(c(oR,oP)) %in% orphanMetabolites(reactionList)),2))
+    message(round(mean(!unique(c(oR, oP)) %in% orphanMetabolites(reactionList)), 2) * 100, "% gaps filled in the last iteration")
   }
-  if(isTRUE(consensus)){
+  if (isTRUE(consensus)) {
     return(reactionList)
   } else {
     return(newR)
   }
 }
+
+fillOrphanMetabolites <- function(reference, reactionList, limit, reference_metabolites, oM, newR) {
+  repeat({
+    aC <- additionCost(reactionList = reference, reference = reactionList)
+    rA <- reference[aC <= limit]
+    toAdd <- rA[unlist(lapply(reference_metabolites[aC <= limit], function(sR) {
+      any(sR %in% oM)
+    }))]
+    if (any(!toAdd %in% newR)) {
+      newR <- unique(c(newR, toAdd))
+      reactionList <- unique(c(reactionList, newR))
+    } else {
+      break()
+    }
+  })
+  return(list("new" = newR, "summary" = reactionList))
+}
 # hsa <- getReference(organism = "hsa")
 # reactionList <- sample(hsa$reaction,100)
 # reference <- hsa$reaction[!hsa$reaction %in% reactionList]

diff --git a/R/gapFind.R b/R/gapFind.R
@@ -3,12 +3,21 @@
 #' @author Daniel Osorio <[email protected]>
 #' @title Find gaps in a metabolic network
 #' @description This function identifies the gaps (metabolites not produced or not consumed in any other reaction or just involved in one reaction) for a set of stoichometric reactions
-gapFind <- function(reactionList, removeExchange = FALSE){
-  if(class(reactionList) == "modelorg"){
+#' @param reactionList A set of stoichiometric reaction with the following format:
+#'
+#' \code{"H2O[c] + Urea-1-carboxylate[c] <=> 2 CO2[c] + 2 NH3[c]"}
+#'
+#' Where arrows and plus signs are surrounded by a "space character".
+#' It is also expected that stoichiometry coefficients are surrounded by spaces, (nothe the "2" before the CO2[c] or the NH3[c]).
+#' It also expects arrows to be in the form "\code{=>}" or "\code{<=>}".
+#' Meaning that arrows like "\code{==>}", "\code{<==>}", "\code{-->}" or "\code{->}" will not be parsed and will lead to errors.
+#' @param removeExchange A boolean value \code{FALSE} to ignore the exchange reactions to identify the gaps.
+gapFind <- function(reactionList, removeExchange = FALSE) {
+  if (class(reactionList) == "modelorg") {
     reactionList <- model2string(model = reactionList)
   }
-  if (class(reactionList) == "character"){
-    if(removeExchange == TRUE){
+  if (class(reactionList) == "character") {
+    if (removeExchange == TRUE) {
       reactionList <- reactionList[!minval:::reactionType(reactionList = reactionList) == "Exchange reaction"]
     }
     gaps <- list()