-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Add main R functions: tknock.R, random_experiments.R, lm_knockoff.R, and others
- Loading branch information
0 parents
commit 63168f0
Showing
58 changed files
with
3,194 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
^tknock\.Rproj$ | ||
^\.Rproj\.user$ | ||
^LICENSE\.md$ | ||
^README\.Rmd$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
.Rproj.user | ||
.Rhistory | ||
.Rdata | ||
.httr-oauth | ||
.DS_Store | ||
inst/doc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
Package: tknock | ||
Title: The T-Knock filter for fast high-dimensional variable selection with FDR control | ||
Version: 0.0.0.9000 | ||
Authors@R: | ||
c( | ||
person("Jasin", "Machkour", , "[email protected]", role = c("aut", "cre")), | ||
person("Simon", "Tien", , email = "[email protected]", role = "aut"), | ||
person(c("Daniel", "P."), "Palomar", , "[email protected]", role = "aut"), | ||
person("Michael", "Muma", , "[email protected]", role = "aut") | ||
) | ||
Maintainer: Jasin Machkour <[email protected]> | ||
Description: Performs variable selection in high-dimensional settings where the number of variables is potentially higher than the number of observations (data points) while controlling the FDR at a user-defined target level. | ||
License: GPL (>= 3) | ||
Encoding: UTF-8 | ||
LazyData: true | ||
Roxygen: list(markdown = TRUE) | ||
RoxygenNote: 7.1.2 | ||
Suggests: | ||
knitr, | ||
rmarkdown, | ||
testthat (>= 3.0.0) | ||
Config/testthat/edition: 3 | ||
Imports: | ||
Matrix, | ||
MASS, | ||
stats, | ||
mvnfast, | ||
tlars, | ||
parallel, | ||
doParallel, | ||
foreach, | ||
doRNG, | ||
snpStats, | ||
methods | ||
Depends: | ||
R (>= 2.10) | ||
VignetteBuilder: knitr |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(FDP) | ||
export(Phi_prime_fun) | ||
export(TPP) | ||
export(add_knockoffs) | ||
export(add_knockoffs_GVS) | ||
export(fdp_hat) | ||
export(lm_knockoff) | ||
export(random_experiments) | ||
export(select_var_fun) | ||
export(tknock) | ||
import(MASS) | ||
import(doParallel) | ||
import(doRNG) | ||
import(foreach) | ||
import(methods) | ||
import(mvnfast) | ||
import(parallel) | ||
import(snpStats) | ||
import(stats) | ||
import(tlars) | ||
importFrom(Matrix,nearPD) | ||
importFrom(Matrix,qr) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#' False discovery proportion (FDP) | ||
#' | ||
#' @param beta_hat Estimated support vector. | ||
#' @param beta True support vector. | ||
#' @param eps Numerical zero. | ||
#' | ||
#' @return FDP | ||
#' | ||
#' @export | ||
FDP = function(beta_hat, beta, eps = .Machine$double.eps) { | ||
if (sum(is.na(beta_hat)) == length(beta_hat)) { | ||
fdp = NA | ||
} else if (sum(abs(beta_hat) > eps) == 0) { | ||
fdp = 0 | ||
} else{ | ||
fdp = 100 * (sum(abs(beta) < eps & | ||
abs(beta_hat) > eps) / sum(abs(beta_hat) > eps)) | ||
} | ||
return(fdp) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
#' Toy data generated from a Gaussian linear model. | ||
#' | ||
#' A dataset containing a predictor matrix X with n = 50 observations | ||
#' and p = 100 variables (predictors), and a sparse parameter vector beta | ||
#' with associated support vector. | ||
#' | ||
#' Generated as follows: | ||
#' set.seed(789) | ||
#' n = 50 | ||
#' p = 100 | ||
#' X = matrix(rnorm(n * p), nrow = n, ncol = p) | ||
#' beta = c(rep(5, times = 3), rep(0, times = 97)) | ||
#' supp = beta > 0 | ||
#' y = X %*% beta + rnorm(n) | ||
#' Gauss_data = list(X = X, | ||
#' y = y, | ||
#' beta = beta, | ||
#' supp = supp); | ||
#' | ||
#' @format A list containing a matrix X and vectors y, beta, and supp: | ||
#' \describe{ | ||
#' \item{X}{Predictor matrix, n = 50, p = 100.} | ||
#' \item{y}{Response vector.} | ||
#' \item{beta}{Parameter vector.} | ||
#' \item{supp}{Support vector.} | ||
#' } | ||
"Gauss_data" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#' Computes the Deflated Relative Occurrences | ||
#' | ||
#' Computes the matrix of deflated relative occurrences for all variables (i.e., j = 1,..., p) and for T = 1, ..., T_stop. | ||
#' | ||
#' @param p Number of candidate variables. | ||
#' @param T_stop Number of included knockoffs after which the random experiments (i.e., forward selection processes) are stopped. | ||
#' @param L_val Number of knockoffs. | ||
#' @param phi_T_mat Matrix of relative occurrences for all variables (i.e., j = 1,..., p) and for T = 1, ..., T_stop. | ||
#' @param Phi Vector of relative occurrences for all variables (i.e., j = 1,..., p) at T = T_stop. | ||
#' @param eps Numerical zero. | ||
#' | ||
#' @return Matrix of deflated relative occurrences for all variables (i.e., j = 1,..., p) and for T = 1, ..., T_stop. | ||
#' | ||
#' @export | ||
#' | ||
#' @examples | ||
#' data("Gauss_data") | ||
#' X = Gauss_data$X | ||
#' y = c(Gauss_data$y) | ||
#' rand_exp = random_experiments(X, y) | ||
#' Phi_prime_fun(p = ncol(X), | ||
#' T_stop = rand_exp$T_stop, | ||
#' L_val = rand_exp$L_val, | ||
#' phi_T_mat = rand_exp$phi_T_mat, | ||
#' Phi = rand_exp$Phi, | ||
#' eps = rand_exp$eps) | ||
Phi_prime_fun = function(p, | ||
T_stop, | ||
L_val, | ||
phi_T_mat, | ||
Phi, | ||
eps = .Machine$double.eps) { | ||
av_num_var_sel = colSums(phi_T_mat) | ||
fifty_phi_T_mat = phi_T_mat[Phi > 0.5, , drop = FALSE] | ||
delta_av_num_var_sel = colSums(fifty_phi_T_mat) | ||
|
||
if (T_stop > 1) { | ||
delta_av_num_var_sel[2:T_stop] = delta_av_num_var_sel[2:T_stop] - delta_av_num_var_sel[1:(T_stop - 1)] | ||
phi_T_mat[, 2:T_stop] = phi_T_mat[, 2:T_stop] - phi_T_mat[, 1:(T_stop - 1)] | ||
} | ||
|
||
phi_scale = rep(NA, times = length(delta_av_num_var_sel)) | ||
for (t in seq_along(delta_av_num_var_sel)) { | ||
if (delta_av_num_var_sel[t] > eps) { | ||
phi_scale[t] = 1 - (((p - av_num_var_sel[t]) / (L_val - t + 1)) / delta_av_num_var_sel[t]) | ||
} else{ | ||
phi_scale[t] = 0 | ||
} | ||
} | ||
|
||
Phi_prime = phi_T_mat %*% phi_scale | ||
|
||
return(Phi_prime) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#' True positive proportion (TPP) | ||
#' | ||
#' @param beta_hat Estimated support vector. | ||
#' @param beta True support vector. | ||
#' @param eps Numerical zero. | ||
#' | ||
#' @return TPP. | ||
#' | ||
#' @export | ||
TPP = function(beta_hat, beta, eps = .Machine$double.eps) { | ||
if (sum(is.na(beta_hat)) == length(beta_hat)) { | ||
tpp = NA | ||
} else if (sum(abs(beta) > eps) == 0) { | ||
tpp = 0 | ||
} else{ | ||
tpp = 100 * (sum(abs(beta) > eps & | ||
abs(beta_hat) > eps) / sum(abs(beta) > eps)) | ||
} | ||
return(tpp) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#' Add knockoffs to the original predictor matrix | ||
#' | ||
#' Sample L_val knockoff vectors from the univariate standard normal distribution and append them to the predictor matrix X. | ||
#' | ||
#' @param X Real valued Predictor matrix. | ||
#' @param L_val Number of knockoffs. | ||
#' @param cor.structure TRUE/FALSE. | ||
#' @param empirical TRUE/FALSE. | ||
#' @param eps Numerical zero. | ||
#' | ||
#' @return Enlarged predictor matrix, i.e., original predictor matrix and knockoffs. | ||
#' | ||
#' @import stats | ||
#' @importFrom Matrix qr | ||
#' @importFrom Matrix nearPD | ||
#' @import MASS | ||
#' @import mvnfast | ||
#' | ||
#' @export | ||
#' | ||
#' @examples | ||
#' n = 50 | ||
#' p = 100 | ||
#' add_knockoffs(X = matrix(rnorm(n * p), nrow = n, ncol = p), L_val = p) | ||
add_knockoffs = function(X, | ||
L_val, | ||
cor.structure = FALSE, | ||
empirical = FALSE, | ||
eps = .Machine$double.eps) { | ||
g = 1 | ||
n = nrow(X) | ||
p = ncol(X) | ||
mu = rep(0, times = p) | ||
for (i in seq(g)) { | ||
if (L_val != p) { | ||
X_surrogate = matrix( | ||
stats::rnorm(n * L_val), | ||
nrow = n, | ||
ncol = L_val, | ||
byrow = FALSE | ||
) | ||
} else{ | ||
if (cor.structure) { | ||
Sig = stats::cov(X) | ||
if (Matrix::qr(Sig)$rank != p) { | ||
Sig = as.matrix(Matrix::nearPD(Sig)$mat) | ||
} | ||
if (empirical) { | ||
X_surrogate = MASS::mvrnorm(n, | ||
mu = mu, | ||
Sigma = Sig, | ||
empirical = empirical) | ||
} else{ | ||
X_surrogate = mvnfast::rmvn( | ||
n, | ||
mu = mu, | ||
sigma = Sig, | ||
ncores = 1, | ||
isChol = FALSE, | ||
A = NULL | ||
) | ||
} | ||
} else{ | ||
X_surrogate = matrix(rnorm(n * p), | ||
nrow = n, | ||
ncol = p, | ||
byrow = FALSE) | ||
} | ||
} | ||
X_Knock = cbind(X, X_surrogate) | ||
} | ||
return(X_Knock) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#' Add GVS-knockoffs to the original predictor matrix | ||
#' | ||
#' Sample L_val knockoff vectors for the T-Knock+GVS filter from the univariate standard normal distribution and append them to the predictor matrix X. | ||
#' | ||
#' @param X Real valued predictor matrix. | ||
#' @param L_val Number of knockoffs. | ||
#' @param corr_max Maximum allowed correlation between any two predictors from different clusters. | ||
#' | ||
#' @return Enlarged predictor matrix for the T-Knock+GVS filter, i.e., original predictor matrix and knockoffs. | ||
#' | ||
#' @import stats | ||
#' @import MASS | ||
#' | ||
#' @export | ||
#' | ||
#' @examples | ||
#' n = 50 | ||
#' p = 100 | ||
#' add_knockoffs_GVS(X = matrix(rnorm(n * p), nrow = n, ncol = p), L_val = p) | ||
add_knockoffs_GVS = function(X, | ||
L_val, | ||
corr_max = 0.5) { | ||
n = nrow(X) | ||
p = ncol(X) | ||
if (is.null(names(X))) { | ||
colnames(X) = paste0('V', seq(p)) | ||
} | ||
|
||
# Single linkage hierarchical clustering using the sample correlation as the similarity measure | ||
sigma_X = stats::cov(X) | ||
sigma_X_dist = stats::as.dist(1 - abs(stats::cov2cor(sigma_X))) | ||
fit = stats::hclust(sigma_X_dist, method = 'single') | ||
clusters = stats::cutree(fit, h = 1 - corr_max) | ||
max_clusters = max(clusters) | ||
clusters = data.frame('Var' = names(clusters), | ||
'Cluster_Nr.' = unname(clusters)) | ||
clusters = stats::aggregate(clusters$'Var' ~ clusters$'Cluster_Nr.', FUN = 'c') | ||
cluster_sizes = vector('numeric', length = max_clusters) | ||
for (j in seq(max_clusters)) { | ||
cluster_sizes[j] = length(clusters$`clusters$Var`[[j]]) | ||
} | ||
w_max = L_val / p | ||
|
||
X_p_surrogate = matrix(NA, nrow = n, ncol = p) | ||
X_Knock = matrix(NA, nrow = n, ncol = p + L_val) | ||
X_Knock[, seq(p)] = X | ||
|
||
for (w in seq(w_max)) { | ||
for (z in seq(max_clusters)) { | ||
sub_X = X[, clusters$`clusters$Var`[[z]], drop = FALSE] | ||
sigma_sub_X = stats::cov(sub_X) | ||
idx = cumsum(cluster_sizes) | ||
mu = rep(0, times = cluster_sizes[z]) | ||
if (z == 1) { | ||
X_p_surrogate[, seq(idx[z])] = MASS::mvrnorm(n, | ||
mu = mu, | ||
Sigma = sigma_sub_X, | ||
empirical = FALSE) | ||
} else{ | ||
X_p_surrogate[, seq(idx[z - 1] + 1, idx[z])] = MASS::mvrnorm(n, | ||
mu = mu, | ||
Sigma = sigma_sub_X, | ||
empirical = FALSE) | ||
} | ||
} | ||
X_Knock[, seq(w * p + 1, (w + 1) * p)] = X_p_surrogate | ||
} | ||
return(X_Knock) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#' Computes the conservative FDP estimate of the T-Knock filter | ||
#' | ||
#' @param V Voting level grid. | ||
#' @param Phi Vector of relative occurrences. | ||
#' @param Phi_prime Vector of deflated relative occurrences. | ||
#' @param T_stop Number of included knockoffs after which the random experiments (i.e., forward selection processes) are stopped. | ||
#' @param L_val Number of knockoffs. | ||
#' @param eps Numerical zero. | ||
#' | ||
#' @return Vector of conservative FDP estimates for each value of the voting level grid. | ||
#' | ||
#' @export | ||
fdp_hat = function(V, | ||
Phi, | ||
Phi_prime, | ||
T_stop, | ||
L_val, | ||
eps = .Machine$double.eps) { | ||
fdp_h = rep(NA, times = length(V)) | ||
for (i in seq_along(V)) { | ||
num_sel_var = sum(Phi > V[i]) | ||
if (num_sel_var < eps) { | ||
fdp_h[i] = 0 | ||
} else{ | ||
fdp_h[i] = min(1, (sum((1 - Phi_prime)[Phi > V[i]]) / (num_sel_var))) | ||
} | ||
} | ||
return(fdp_h) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#' Ground truth for simulated GWAS. | ||
#' | ||
#' A table containing the disease SNPs (disease allele, index, position, heterozygote risks, | ||
#' homozygote risks, and rs-ID-number). | ||
#' | ||
#' @format A dataframe containing six columns: | ||
#' \describe{ | ||
#' \item{allele}{Disease allele (zero or one).} | ||
#' \item{idx}{Index of disease SNP.} | ||
#' \item{position}{Position of disease SNP.} | ||
#' \item{risk_hete}{Heterozygote risk.} | ||
#' \item{risk_homo}{Homozygote risk.} | ||
#' \item{rs}{rs-ID-number.} | ||
#' } | ||
"ground_truth" |
Oops, something went wrong.