RGBM


RGBM is a LS-TreeBoost and LAD-TreeBoost for Gene Regulatory Network Reconstruction.
It Provides an implementation of Regularized LS-TreeBoost & LAD-TreeBoost algorithm for Regulatory Network inference from any type of expression data (Microarray/RNA-seq etc).

Availability: RGBM is also available as


RGBM Tutorial

Raghvendra Mall and Khalid Kunji

April 25, 2018

Steps For Inferring GRN using RGBM and perform MRA using FGSEA

This is a tutorial for using RGBM (for inferring gene regulatory network) and performing Master Regulator Analysis.

You will need to install the following packages: RGBM, doMC, foreach, igraph, gplots from CRAN. You will also need the fgsea package from bioconductor. You can register as many processes as available on your resource with registerDoMC(no_of_threads).

library(RGBM)
## Loading required package: foreach
## Loading required package: plyr
## Loading required package: doParallel
## Loading required package: iterators
## Loading required package: parallel
library(doMC)
library(foreach)
library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(fgsea)
## Loading required package: Rcpp
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
registerDoMC(20)

Loading Data and Necessary Utility Functions

You can source all utility functions for MRA and loading data

source('gene-reverse-network.R')
source('loadData.R')
source('get_functions.R')

Loading the log2-transformed quantile normalized data file (the data should be quantile normalized followed by log2 transformation, if the raw data is RNA-Seq counts).

load("Dataset/gep_1250_agi_rnaseq-dup.RData")
boxplot(z[,1:10])

Loading the Mechanistic TF-target regulations file

load("Dataset/me.net.Rdata")
rownames(me.net)
##   [1] "ALX3"    "ARNTL"   "ARX"     "AR"      "ATF4"    "ATF7"    "BCL6B"  
##   [8] "CEBPB"   "CEBPD"   "CEBPG"   "CENPB"   "CLOCK"   "CPEB1"   "CREB3L1"
##  [15] "CREB3"   "CTCF"    "CREB3L2" "CREB5"   "DBP"     "DLX1"    "DLX2"   
##  [22] "DLX5"    "DLX6"    "E2F1"    "E2F2"    "E2F3"    "E2F4"    "E2F7"   
##  [29] "E2F8"    "EBF1"    "EGR1"    "EGR2"    "EGR3"    "ELF1"    "ELF3"   
##  [36] "ELF4"    "ELK1"    "ELK3"    "ELK4"    "EMX1"    "EMX2"    "EN1"    
##  [43] "EN2"     "ERF"     "ERG"     "ESR1"    "ESRRA"   "ESRRG"   "ETS1"   
##  [50] "ETV1"    "ETV3"    "ETV4"    "ETV5"    "ETV6"    "FLI1"    "FOXC1"  
##  [57] "FOXD2"   "FOXD3"   "FOXG1"   "FOXJ2"   "FOXJ3"   "FOXK1"   "FOXO1"  
##  [64] "FOXO3"   "FOXO4"   "GABPA"   "GATA4"   "GBX2"    "GLI2"    "GLIS1"  
##  [71] "GLIS2"   "GLIS3"   "GMEB2"   "GRHL1"   "GSC"     "GSX1"    "GSX2"   
##  [78] "HES5"    "HESX1"   "HEY1"    "HEY2"    "HIC2"    "HLF"     "HMBOX1" 
##  [85] "HNF1A"   "HOMEZ"   "HOXA10"  "HOXA13"  "HOXA1"   "HOXA2"   "HOXB13" 
##  [92] "HOXB2"   "HOXB3"   "HOXC10"  "HOXC13"  "HOXD11"  "HOXD13"  "HOXD8"  
##  [99] "HSF1"    "HSF2"    "HSF4"    "HIC1"    "HOXA11"  "HOXD3"   "HOXD9"  
## [106] "ID4"     "IRF3"    "IRF5"    "IRF7"    "IRF8"    "IRF9"    "IRX2"   
## [113] "IRX5"    "IRX3"    "JDP2"    "KLF13"   "KLF16"   "KLF12"   "LEF1"   
## [120] "LHX2"    "LHX6"    "LHX9"    "MAFF"    "MAFG"    "MAFK"    "MAX"    
## [127] "MEF2A"   "MEF2B"   "MEF2D"   "MEIS1"   "MEIS2"   "MEIS3"   "MEOX2"  
## [134] "MESP1"   "MGA"     "MLXIPL"  "MLX"     "MNT"     "MSC"     "MSX1"   
## [141] "MSX2"    "MTF1"    "MYBL1"   "MYBL2"   "MAFB"    "NEUROD2" "NFAT5"  
## [148] "NFATC1"  "NFIA"    "NFIB"    "NFIL3"   "NFIX"    "NFKB1"   "NFKB2"  
## [155] "NHLH1"   "NKX3-2"  "NKX6-2"  "NR2C2"   "NR2E1"   "NR2F1"   "NR2F6"  
## [162] "NR3C1"   "NR3C2"   "NR4A2"   "NRF1"    "NRL"     "OLIG1"   "OLIG2"  
## [169] "ONECUT2" "OTX1"    "OTX2"    "PAX1"    "PAX3"    "PAX6"    "PAX7"   
## [176] "PITX1"   "PKNOX1"  "PKNOX2"  "POU2F1"  "POU2F2"  "POU3F1"  "POU3F2" 
## [183] "POU3F3"  "POU4F1"  "PRDM1"   "PRDM4"   "PROX1"   "PRRX1"   "RARA"   
## [190] "RARB"    "RARG"    "RFX2"    "RFX3"    "RFX4"    "RFX5"    "RORA"   
## [197] "RUNX2"   "RUNX3"   "RXRA"    "RXRB"    "RXRG"    "SCRT1"   "SCRT2"  
## [204] "SHOX2"   "SMAD3"   "SNAI2"   "SOX10"   "SOX15"   "SOX18"   "SOX21"  
## [211] "SOX2"    "SOX4"    "SOX7"    "SOX8"    "SOX9"    "SP1"     "SP3"    
## [218] "SP4"     "SP8"     "SPI1"    "SREBF2"  "SRF"     "SOX11"   "SOX17"  
## [225] "SOX1"    "SOX3"    "SREBF1"  "TBR1"    "TBX15"   "TBX19"   "TBX1"   
## [232] "TBX2"    "TBX5"    "TCF3"    "TCF4"    "TCF7L1"  "TEAD1"   "TEAD3"  
## [239] "TEAD4"   "TEF"     "TFAP2A"  "TFAP2B"  "TFAP4"   "TFCP2"   "TFE3"   
## [246] "TFEB"    "TFEC"    "TGIF1"   "TGIF2"   "THRA"    "THRB"    "TP63"   
## [253] "TCF7"    "TP53"    "TP73"    "USF1"    "VAX2"    "VDR"     "VENTX"  
## [260] "XBP1"    "YY1"     "ZBTB7A"  "ZBTB7B"  "ZBTB7C"  "ZIC1"    "ZIC3"   
## [267] "ZIC4"    "ZNF143"  "ZNF232"  "ZNF238"  "ZNF282"  "ZNF410"  "ZNF524" 
## [274] "ZNF740"  "ZNF75A"  "ZNF784"  "DBX2"    "HDX"     "HOXA3"   "HOXA4"  
## [281] "HOXA5"   "HOXA7"   "HOXA9"   "HOXB4"   "HOXB7"   "HOXC4"   "HOXC6"  
## [288] "HOXD10"  "HOXD1"   "LHX5"    "NKX2-2"  "NKX2-5"  "OTP"     "PBX1"   
## [295] "POU6F1"  "SIX1"    "SIX4"    "SIX6"    "ELF2"    "YAP1"    "ARID3A" 
## [302] "ARID5A"  "ATF1"    "BBX"     "FOXJ1"   "GMEB1"   "HBP1"    "IRF6"   
## [309] "KLF7"    "NR2F2"   "OSR1"    "OSR2"    "PLAGL1"  "SOX12"   "SOX13"  
## [316] "SOX5"    "SP100"   "TBP"     "TCF7L2"  "ZBTB12"  "ZBTB3"   "ZFP161" 
## [323] "ZIC2"    "RUNX1"   "ARNT"    "AHR"     "CREB1"   "DDIT3"   "CEBPA"  
## [330] "FOXF2"   "GATA2"   "KLF4"    "FOXQ1"   "IRF1"    "IRF2"    "MZF1"   
## [337] "MYC"     "NFYA"    "PPARG"   "RREB1"   "NFE2L1"  "TAL1"    "JUN"    
## [344] "FOS"     "REL"     "ZEB1"    "MYCN"    "RELA"    "HLTF"    "NR1H2"  
## [351] "ZNF423"  "NFIC"    "TLX1"    "ZNF354C" "STAT1"   "REST"    "STAT3"  
## [358] "ZFX"     "NFE2L2"  "NFATC2"  "INSM1"   "PLAG1"   "ESR2"    "HIF1A"  
## [365] "BATF"    "BCL6"    "E2F6"    "FOSL1"   "FOSL2"   "FOXP1"   "HNF4G"  
## [372] "JUNB"    "JUND"    "NR1H3"   "MEF2C"   "MYOD1"   "MAF"     "NFYB"   
## [379] "NR5A2"   "RFX1"    "SMAD2"   "SMAD4"   "SOX6"    "SP2"     "STAT2"  
## [386] "STAT4"   "STAT5A"  "STAT5B"  "STAT6"   "TCF12"   "USF2"    "ZBTB33" 
## [393] "ZNF263"  "BACH1"   "FOXP2"   "THAP1"   "KLF5"    "ARID3B"  "ATF3"   
## [400] "CREM"    "ID2"     "LIN54"   "MITF"    "MLXIP"   "NFATC3"  "NPAS2"  
## [407] "TCFL5"   "HES1"    "ARNT2"   "ATF2"    "ATF5"    "BPTF"    "BRCA1"  
## [414] "CDC5L"   "CEBPZ"   "CXXC1"   "E2F5"    "E4F1"    "EPAS1"   "ETS2"   
## [421] "ETV7"    "FOSB"    "FOXF1"   "FOXM1"   "FUBP1"   "GLI1"    "GLI3"   
## [428] "HMGA1"   "HMGA2"   "IKZF1"   "KLF15"   "KLF3"    "KLF6"    "KLF8"   
## [435] "MAZ"     "MBD2"    "MECP2"   "NFYC"    "NR0B1"   "NR1D1"   "NR2C1"  
## [442] "NR4A1"   "NR4A3"   "PAX8"    "PBX2"    "PBX3"    "PPARA"   "PPARD"  
## [449] "PURA"    "RELB"    "SMAD1"   "SNAI1"   "TBX3"    "TFDP1"   "ZBTB4"  
## [456] "ZBTB6"   "ZFHX3"
sum(me.net>0)
## [1] 1874570

Loading the phenotype information file

load("Dataset/casesTable.RData")
str(casesTable)
## 'data.frame':    1060 obs. of  51 variables:
##  $ Case                                         : chr  "TCGA-CS-4938" "TCGA-CS-4941" "TCGA-CS-4942" "TCGA-CS-4943" ...
##  $ Tissue.source.site                           : chr  "Thomas Jefferson University" "Thomas Jefferson University" "Thomas Jefferson University" "Thomas Jefferson University" ...
##  $ Study                                        : chr  "Brain Lower Grade Glioma" "Brain Lower Grade Glioma" "Brain Lower Grade Glioma" "Brain Lower Grade Glioma" ...
##  $ BCR                                          : chr  "IGC" "IGC" "IGC" "IGC" ...
##  $ Whole.exome                                  : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ Whole.genome                                 : chr  "Yes" "Yes" "No" "No" ...
##  $ RNAseq                                       : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ SNP6                                         : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ U133a                                        : chr  "No" "No" "No" "No" ...
##  $ HM450                                        : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ HM27                                         : chr  "No" "No" "No" "No" ...
##  $ RPPA                                         : chr  "Yes" "No" "Yes" "Yes" ...
##  $ Histology                                    : chr  "astrocytoma" "astrocytoma" "astrocytoma" "astrocytoma" ...
##  $ Grade                                        : chr  "G2" "G3" "G3" "G3" ...
##  $ Age..years.at.diagnosis.                     : int  31 67 44 37 50 47 39 40 43 53 ...
##  $ Gender                                       : chr  "female" "male" "female" "male" ...
##  $ Survival..months.                            : num  4.7 7.69 43.86 18.14 10.61 ...
##  $ Vital.status..1.dead.                        : int  0 1 1 0 0 0 0 0 1 0 ...
##  $ Karnofsky.Performance.Score                  : int  90 90 90 50 90 100 100 NA 90 90 ...
##  $ Mutation.Count                               : int  15 50 24 30 20 32 26 22 39 26 ...
##  $ Percent.aneuploidy                           : num  0.0694 0.2241 0.0937 0.1724 0.0603 ...
##  $ IDH.status                                   : chr  "Mutant" "WT" "Mutant" "Mutant" ...
##  $ X1p.19q.codeletion                           : chr  "non-codel" "non-codel" "non-codel" "non-codel" ...
##  $ IDH.codel.subtype                            : chr  "IDHmut-non-codel" "IDHwt" "IDHmut-non-codel" "IDHmut-non-codel" ...
##  $ MGMT.promoter.status                         : chr  "Unmethylated" "Methylated" "Unmethylated" "Methylated" ...
##  $ Chr.7.gain.Chr.10.loss                       : chr  "No combined CNA" "Gain chr 7 & loss chr 10" "No combined CNA" "No combined CNA" ...
##  $ Chr.19.20.co.gain                            : chr  "No chr 19/20 gain" "No chr 19/20 gain" "No chr 19/20 gain" "No chr 19/20 gain" ...
##  $ TERT.promoter.status                         : chr  "WT" "Mutant" "WT" "WT" ...
##  $ TERT.expression..log2.                       : num  0 3.81 0 0 2.58 ...
##  $ TERT.expression.status                       : chr  "Not expressed" "Expressed" "Not expressed" "Not expressed" ...
##  $ ATRX.status                                  : chr  "Mutant" "WT" "Mutant" "Mutant" ...
##  $ DAXX.status                                  : chr  "WT" "WT" "WT" "WT" ...
##  $ Telomere.Maintenance                         : chr  "ATRX" "TERT" "ATRX" "ATRX" ...
##  $ BRAF.V600E.status                            : chr  "WT" "WT" "WT" "WT" ...
##  $ BRAF.KIAA1549.fusion                         : chr  "WT" "WT" "WT" "WT" ...
##  $ ABSOLUTE.purity                              : num  0.79 0.61 0.76 0.83 0.74 0.85 0.53 0.92 0.81 0.89 ...
##  $ ABSOLUTE.ploidy                              : num  2 2.05 2.05 3.92 1.94 1.97 4.15 1.95 2.05 1.97 ...
##  $ ESTIMATE.stromal.score                       : num  422 707 563 460 701 ...
##  $ ESTIMATE.immune.score                        : num  805 2283 2076 819 1282 ...
##  $ ESTIMATE.combined.score                      : num  1227 2990 2640 1279 1983 ...
##  $ Original.Subtype                             : chr  "IDHmut-non-codel" "IDHwt" "IDHmut-non-codel" "IDHmut-non-codel" ...
##  $ Transcriptome.Subtype                        : chr  NA "CL" "PN" "PN" ...
##  $ Pan.Glioma.RNA.Expression.Cluster            : chr  "LGr3" "LGr4" "LGr3" "LGr3" ...
##  $ IDH.specific.RNA.Expression.Cluster          : chr  "IDHmut-R3" "IDHwt-R2" "IDHmut-R3" "IDHmut-R3" ...
##  $ Pan.Glioma.DNA.Methylation.Cluster           : chr  "LGm2" "LGm5" "LGm2" "LGm2" ...
##  $ IDH.specific.DNA.Methylation.Cluster         : chr  "IDHmut-K2" "IDHwt-K2" "IDHmut-K2" "IDHmut-K2" ...
##  $ Supervised.DNA.Methylation.Cluster           : chr  "G-CIMP-high" "Mesenchymal-like" "G-CIMP-high" "G-CIMP-high" ...
##  $ Random.Forest.Sturm.Cluster                  : chr  "IDH" "Mesenchymal" "IDH" "IDH" ...
##  $ RPPA.cluster                                 : chr  "K2" NA "K2" "K2" ...
##  $ Telomere.length.estimate.in.blood.normal..Kb.: num  8.11 1.42 NA NA 3.26 ...
##  $ Telomere.length.estimate.in.tumor..Kb.       : num  5.19 1.61 NA NA 1.5 ...

Loading the list of TFs

tfs <- as.character(as.vector(read.csv("Dataset/ColNames_TF.csv",header=TRUE)$x))

Perform Initial Pre-Processing to obtain Expression Matrix, Perturbation Matrix [Matrix of Zeros], Mechanistic Matrix, TFs and Target Genes

#Here we consider me.net is the matrix containing prior set of possible interactions
out <- initial_preprocessing(z,me.net)

#Some targets might be mismatching with mechanistic network, to make things consistent 
#we do the pre-processing. Here we don't consider any Knockout or Knockdown information 
#is available.
mod_z <- out[[1]]
K <- out[[2]]
M <- out[[3]]
tfs <- out[[4]]
targets <- out[[5]]

Specify the path to save results obtained from RGBM

filename <- "GLIOMA"
outputpath <- paste0("Results/",filename)
sample_name <- paste0(filename,"_Full_")

Run the RGBM algorithm to reverse-engineer the regulatory network

V_final <- RGBM(E = mod_z , K = K, g_M = M, tfs = tfs, targets = targets, M=5000, nu = 0.001, 
                s_f = 0.3, lf = 1, no_iterations = 3, mink = 0, experimentid = 1, outputpath 
                = outputpath, sample_type = sample_name, real = 1);
write.table(V_final,file=paste("Results/",filename,"/Adjacency_Matrix/",sample_name,
                         "Final_Adjacency_Matrix.csv",sep=""),row.names = T, col.names = T)
net <- read.table(paste0("Results/",filename,"/Adjacency_Matrix/",sample_name,
                         "Final_Adjacency_Matrix.csv"),header=TRUE,sep=" ")

Make average size of regulon less than 100

#Make average regulon size to less than 100
while((sum(net>0)/length(tfs))>100)
{
  median_weight <- median(net[net>0]);
  net[net<median_weight] <- 0;
}

Make the correlation matrix to get signed regulation information

#Create correlation matrix
corr_matrix <- cross.cor(t(mod_z)[tfs,],t(mod_z),met="spearman",ncore = 16)
corr_matrix[net==0] <- 0;
load(paste0(outputpath,"/Adjacency_Matrix/",sample_name,"Correlation_matrix.Rdata"))

Get all the regulons for all the TFs

regulon_sets <- get_regulons(net,minsize=10)

Create the activity matrix

amat <- activity_mc(mexp = t(mod_z), cormat = corr_matrix, tflist = rownames(net), tau = 0.0)

Get phenotype from case table

glioma_cases <- casesTable$Case;
idh_mutant_cases <- which(casesTable$IDH.status=="Mutant");
idh_wildtype_cases <- which(casesTable$IDH.status=="WT")
tcga_mutant_sample_names <- glioma_cases[idh_mutant_cases];
tcga_wildtype_sample_names <- glioma_cases[idh_wildtype_cases];
mutant_indices <- which(rownames(mod_z) %in% tcga_mutant_sample_names);
wildtype_indices <- which(rownames(mod_z) %in% tcga_wildtype_sample_names)

To perform MRA using FGSEA need to see difference in expression for all targets

#Get difference in expression of target genes using mutant vs wildtype info
pheno_t <- c()
pv_t <- c()
for (j in 1:ncol(mod_z)){
  mutant_expr <- mean(mod_z[mutant_indices,j]);
  wildtype_expr <- mean(mod_z[wildtype_indices,j]);
  pheno_t <- c(pheno_t,mutant_expr-wildtype_expr);
  pv_t <- c(pv_t,wilcox.test(mod_z[mutant_indices,j],mod_z[wildtype_indices,j],
                             exact=F)$p.value)
}
padj_t = p.adjust(pv_t,method="fdr")
names(pheno_t) <- colnames(mod_z);
sorted_pheno_t <- sort(pheno_t,decreasing = TRUE)

Perform Gene Set Enrichment

fgseaRes <- fgsea(regulon_sets,sorted_pheno_t,minSize=10,maxSize=max(unlist(lapply(
                  regulon_sets,length))), nperm=100000)
topPathwaysUp <- fgseaRes[ES > 0][head(order(pval), n=20), pathway]
topPathwaysDown <- fgseaRes[ES < 0][head(order(pval), n=20), pathway]
topPathways <- c(topPathwaysUp, rev(topPathwaysDown))

Plot the most enriched TFs with target info

plotGseaTable(regulon_sets[topPathways], sorted_pheno_t, fgseaRes, gseaParam = 0.5)

Identify top MRs

mra=as.data.frame(fgseaRes[order(pval)])
mra$leadingEdge=as.character(mra$leadingEdge)

topmr = mra$pathway[mra$padj<0.01]
topmr_info = mra[mra$padj<0.01,]
indices = which(mra$padj<0.01);
mra_table <- data.frame(topmr=mra$pathway[indices],padj=mra$padj[indices],
                        NES=mra$NES[indices])

Build the activity matrix based on top MRs

amat.mr <- amat[topmr,]
amat.mr.mutant <- amat.mr[,mutant_indices];
amat.mr.wildtype <- amat.mr[,wildtype_indices]
wilcox_test_info <- perform_wilcox_test(amat.mr.mutant,amat.mr.wildtype)
wilcox_test_info <- wilcox_test_info[order(wilcox_test_info$FC_Mean,decreasing=T),]

new_mr_activity_matrix <- as.matrix(amat[rownames(wilcox_test_info),
                                        c(mutant_indices,wildtype_indices)])
rownames(new_mr_activity_matrix)
##  [1] "SOX1"    "SOX8"    "THRA"    "FOXO4"   "SOX6"    "SCRT1"   "TEF"    
##  [8] "ZNF238"  "MYOD1"   "MYCN"    "TFAP4"   "NR0B1"   "HLF"     "HEY2"   
## [15] "KLF12"   "ARNT2"   "ZBTB3"   "ZBTB4"   "CTCF"    "BPTF"    "PAX1"   
## [22] "GLIS2"   "HIC2"    "TBR1"    "ID4"     "EMX1"    "ZNF263"  "THRB"   
## [29] "NEUROD2" "SOX10"   "LHX6"    "MEF2C"   "STAT4"   "IRF8"    "MAFF"   
## [36] "JUNB"    "IRF5"    "E2F2"    "RUNX2"   "BCL6B"   "PRDM1"   "STAT3"  
## [43] "E2F1"    "SPI1"    "FOSL2"   "BATF"    "FOXM1"   "HIC1"    "KLF6"   
## [50] "CEBPB"   "IRF7"    "RELB"    "VDR"     "ARID5A"  "MBD2"    "RUNX1"  
## [57] "STAT1"   "IRF1"    "TGIF1"   "FOSL1"   "MEOX2"   "ETV7"    "CEBPD"  
## [64] "TEAD3"   "HOXC10"  "HOXC6"
colnames(new_mr_activity_matrix) <- NULL

colcol <- rep("white",ncol(new_mr_activity_matrix));
colcol[1:length(mutant_indices)] <- "yellow"
colcol[(length(wildtype_indices)+1):length(colcol)] <- "blue"
hc.cols <- hclust(dist(t(new_mr_activity_matrix),method="manhattan"),method="ward.D2")

new_mr_activity_matrix[new_mr_activity_matrix <= quantile(new_mr_activity_matrix, 0.05)
                       ] <- quantile(new_mr_activity_matrix, 0.05)
new_mr_activity_matrix[new_mr_activity_matrix >= quantile(new_mr_activity_matrix, 0.95)
                       ] <- quantile(new_mr_activity_matrix, 0.95)

#Here yellow colour is for mutant case and blue colour is for wildtype
par(mar=c(1,1,1,1))
heatmap.2(x=new_mr_activity_matrix, Rowv = F, Colv = as.dendrogram(hc.cols), col = 
            greenred(50), dendrogram = "column",trace="none", ColSideColors = colcol)


RGBM Discussion