Chapter 2 Database construction
2.1 KEGG pathway’s metabolite and gene
This is the metabolites and the genes in every metabolism KEGG pathway.6821个代谢物和基因.
library(KEGGREST)
library(dplyr)
## 所有人代谢相关通路,这个是之前自己整理的
pathway_meta <- data.table::fread("result/pathway_hsa.txt",sep="\t",header=F) %>%
as.data.frame()
## 对每个通路提取对应的基因和代谢物
result_gene <- data.frame()
result_compound <- data.frame()
for (i in 1:length(pathway_meta$V2)){
print(pathway_meta$V2[i])
path <- keggGet(pathway_meta$V2[i])
# print(pathway_meta$V2[i])
## 提取此通路的基因信息
gene.info <- path[[1]]$GENE %>%
as.data.frame() %>%
dplyr::rename("V1"=".") %>%
tidyr::separate(V1,sep=";","V1") %>%
dplyr::pull(V1)
## 提取基因中的gene symbol
gene.symbol <- unique(gene.info[1:length(gene.info)%%2 == 0])
#gene.id <- gene.info[1:length(gene.info)%%2 == 1]
# 生成gene symbol和Entrez ID匹配的数据框
# gene.df <- data.frame(gene.symbol = gene.symbol,gene.id = gene.id,kegg=pathway_meta$V2[i],kegg_path=path[[1]]$PATHWAY_MAP)
gene.df <- data.frame(type="gene",name = gene.symbol,kegg_pathwayid=pathway_meta$V2[i],kegg_pathwayname=path[[1]]$PATHWAY_MAP,kegg_category =pathway_meta$V1[i])
#head(gene.df)
result_gene <- rbind(result_gene,gene.df)
## 提取此通路的代谢物信息 (这里会有一些药物,是否去掉?"hsa00514","hsa00533", "hsa00511"通路没有compound?方老师帮忙check一下?)
if (length(path[[1]]$COMPOUND)>0) {
compound.info <- path[[1]]$COMPOUND %>%
as.data.frame() %>%
dplyr::rename("V1"=".") %>%
rownames()
# 生成compound和对应的pathway信息
compound.df <- data.frame(type="metabolite",name = compound.info,kegg_pathwayid=pathway_meta$V2[i],kegg_pathwayname=path[[1]]$PATHWAY_MAP,kegg_category =pathway_meta$V1[i])
result_compound <- rbind(result_compound,compound.df)
}
}
result <- rbind(result_gene,result_compound) %>%
as_tibble()
dim(result)
write.table(result,"result/KEGGpathway_metabolite_gene-v0117.txt",quote=F,row.names = F,sep="\t")2.2 KEGG metabolite-metabolite pairs and metabolite-gene pairs
2.2.1 KEGG’s reaction and compound’s download
## the code is runing in 2024.01.15
library(KEGGREST)
library(plyr)
#source("/Users/guituantuan/Downloads/R-packages/RbioRXN-master/R/get.kegg.all.R")
#source("/Users/guituantuan/Downloads/R-packages/RbioRXN-master/R/get.kegg.byId.R")
##断掉的话就多试几次
source("https://raw.githubusercontent.com/cran/RbioRXN/master/R/get.kegg.all.R")
source("https://raw.githubusercontent.com/cran/RbioRXN/master/R/get.kegg.byId.R")
## KEGG数据库中的代谢反应和代谢物注释信息 -------------------------------------------------
#8:33-
keggAll = get.kegg.all()
saveRDS(keggAll, file = "result/keggAll-v0115.RData")
#load("result/keggAll-v0115.RData")
#keggAll <- readRDS("result/keggAll-v0115.RData")
## 查看代谢反应和代谢物的数量
dim(keggAll$reaction)
# [1] 11990 14
dim(keggAll$compound)
# [1] 19270 25
write.csv(keggAll$reaction,file = "result/keggAllreaction_v20240115.csv",row.names = F)
write.csv(keggAll$compound,file = "result/keggAllcompound_v20240115.csv",row.names = F)2.2.2 Downloading the enzyme and its corresponding gene information from the htext,and extract the gene and the enzyme’s relationship
Download the htext file.
https://www.genome.jp/kegg-bin/get_htext?hsa01000
grep "^E" result/hsa01000.keg | sed 's/E //g'|cut -d" " -f2 |cut -d";" -f1 >result/gene.txt
grep "^E" result/hsa01000.keg | sed 's/E //g'|sed 's/\[EC\:/ /g' |sed 's/\] //g' |cut -f3 >result/enzyme.txt
###sed 's/\[EC\:/ /g'后面为制表符
paste result/gene.txt result/enzyme.txt |grep -v "D-dopachrome" |grep -v "cytochrome"|grep -v "putative" >result/gene_enzyme.txtthe gene_enzyme.txt is like this
2.2.3 Extracting the metabolite-gene pairs and metabolite-metabolite pairs
library(dplyr)
## Transformed the enzyme and its corresponding gene to one-to-one relationship
gene_enzyme <- data.table::fread("result/gene_enzyme.txt",header=F) %>%
as.data.frame() %>%
tidyr::separate_rows("V2",sep=" ") %>%
unique() %>%
dplyr::rename("gene_name"="V1") %>%
dplyr::rename("enzyme"="V2")
## all the reaction in kegg
a <- read.csv("result/keggAllreaction_v20240115.csv")
# get all the metabolite and its corresponding gene from all the reaction in kegg
## the metabolite-gene pairs are exacted from every compound in the equation and the gene with the enzyme
metabolite_gene <- a %>%
dplyr::select(c("EQUATION","ENZYME")) %>%
tidyr::separate_rows(EQUATION,sep=" <=> ") %>%
tidyr::separate_rows(EQUATION,sep=" ") %>%
dplyr::filter(grepl("^C",EQUATION)) %>%
tidyr::separate_rows(ENZYME,sep="///") %>%
dplyr::rename("compound"="EQUATION") %>%
dplyr::rename("enzyme"="ENZYME") %>%
dplyr::left_join(gene_enzyme,by="enzyme") %>%
dplyr::filter(!is.na(gene_name)) %>%
dplyr::select(-"enzyme") %>%
dplyr::mutate(compound=gsub("[(]n[)]","",compound)) %>%
dplyr::mutate(compound=gsub("[(]m[)]","",compound)) %>%
dplyr::mutate(compound=gsub("[(]n[+]1[)]","",compound)) %>%
dplyr::mutate(compound=gsub("[(]n[+]m[)]","",compound)) %>%
dplyr::mutate(compound=gsub("[(]n[-]1[)]","",compound)) %>%
unique() %>%
dplyr::mutate(src_type="metabolite") %>%
dplyr::mutate(dest_type="gene") %>%
dplyr::rename("src"="compound") %>%
dplyr::rename("dest"="gene_name") %>%
dplyr::select(c("src_type","src","dest_type","dest"))
# get all the metabolite-metabolite pairs in the reactions
## get the metabolite from the EQUATION, the metabolite-metabolite pairs are exacted from every compound in left of equation and every compound in right of equation.
metabolite_metabolite <- a %>%
dplyr::select(c("EQUATION","ENZYME")) %>%
tidyr::separate(EQUATION,c("a","b"),sep=" <=> ")%>%
tidyr::separate_rows(a,sep=" ") %>%
tidyr::separate_rows(b,sep=" ") %>%
dplyr::filter(grepl("^C",a)) %>%
dplyr::filter(grepl("^C",b)) %>%
dplyr::select(c("a","b")) %>%
unique() %>%
dplyr::mutate(metabolite_1=gsub("[(]n[)]","",a)) %>%
dplyr::mutate(metabolite_1=gsub("[(]n[+]m[)]","",metabolite_1)) %>%
dplyr::mutate(metabolite_1=gsub("[(]side","",metabolite_1)) %>%
dplyr::mutate(metabolite_1=gsub("[(]m[+]n[)]","",metabolite_1)) %>%
dplyr::mutate(metabolite_1=gsub("[(]n[+]1[)]","",metabolite_1)) %>%
dplyr::mutate(metabolite_1=gsub("[(]m[)]","",metabolite_1)) %>%
dplyr::mutate(metabolite_2=gsub("[(]n[)]","",b)) %>%
dplyr::mutate(metabolite_2=gsub("[(]n[+]m[)]","",metabolite_2)) %>%
dplyr::mutate(metabolite_2=gsub("[(]side","",metabolite_2)) %>%
dplyr::mutate(metabolite_2=gsub("[(]m[+]n[)]","",metabolite_2)) %>%
dplyr::mutate(metabolite_2=gsub("[(]n[+]1[)]","",metabolite_2)) %>%
dplyr::mutate(metabolite_2=gsub("[(]m[)]","",metabolite_2)) %>%
dplyr::mutate(metabolite_2=gsub("[(]n[-]1[)]","",metabolite_2)) %>%
dplyr::mutate(metabolite_2=gsub("[(]x[)]","",metabolite_2)) %>%
dplyr::mutate(metabolite_2=gsub("[(]n[-]x[)]","",metabolite_2)) %>%
dplyr::mutate(metabolite_2=gsub("[(]n[+]2[)]","",metabolite_2)) %>%
dplyr::mutate(metabolite_2=gsub("[(]m[-]1[)]","",metabolite_2)) %>%
dplyr::select(-c("a","b")) %>%
dplyr::filter(metabolite_1 != metabolite_2) %>%
dplyr::mutate(src_type="metabolite") %>%
dplyr::mutate(dest_type="metabolite") %>%
dplyr::rename("src"="metabolite_1") %>%
dplyr::rename("dest"="metabolite_2") %>%
dplyr::select(c("src_type","src","dest_type","dest"))
write.table(metabolite_gene,"result/KEGG_metabolite_gene.txt",quote=F,row.names = F,sep="\t")
write.table(metabolite_metabolite,"result/KEGG_metabolite_metabolite.txt",quote=F,row.names = F,sep="\t")2.3 Graphite
2.3.1 Download all the data from graphite
library(graphite)
library(clipper)
library(dplyr)
kpaths <- pathways("hsapiens", "kegg")
kpaths_result <- data.frame()
for (i in 1:length(kpaths)) {
kid <- attributes(kpaths[[i]])$id
ktitle <- attributes(kpaths[[i]])$title
kpaths_1 <- convertIdentifiers(convertIdentifiers(kpaths[[i]], "symbol"),"KEGGCOMP")
kpaths_result_temp <- edges(kpaths_1,"mixed") %>%
dplyr::mutate(pathwayid=kid) %>%
dplyr::mutate(pathway=ktitle)
kpaths_result <- rbind(kpaths_result,kpaths_result_temp)
}
write.table(kpaths_result,"result/Graphite/gene-metabolite-kegg.txt",quote=F,sep="\t",row.names=F)
spaths <- pathways("hsapiens", "smpdb")
smpdb_result <- data.frame()
for (i in 1:length(spaths)) {
kid <- attributes(spaths[[i]])$id
ktitle <- attributes(spaths[[i]])$title
smpdb_1 <- convertIdentifiers(convertIdentifiers(spaths[[i]], "symbol"),"KEGGCOMP")
smpdb_result_temp <- edges(smpdb_1, "mixed") %>%
dplyr::mutate(pathwayid=kid) %>%
dplyr::mutate(pathway=ktitle)
smpdb_result <- rbind(smpdb_result,smpdb_result_temp)
}
write.table(smpdb_result,"result/Graphite/gene-metabolite-smpdb.txt",quote=F,sep="\t",row.names=F)
wikipaths <- pathways("hsapiens", "wikipathways")
wikipaths_result <- data.frame()
for (i in 1:length(wikipaths)) {
kid <- attributes(wikipaths[[i]])$id
ktitle <- attributes(wikipaths[[i]])$title
wikipaths_1 <- convertIdentifiers(convertIdentifiers(wikipaths[[i]], "symbol"),"KEGGCOMP")
wikipaths_result_temp <- edges(wikipaths_1,"mixed") %>%
dplyr::mutate(pathwayid=kid) %>%
dplyr::mutate(pathway=ktitle)
wikipaths_result <- rbind(wikipaths_result,wikipaths_result_temp)
}
write.table(wikipaths_result,"result/Graphite/gene-metabolite-wikipaths.txt",quote=F,sep="\t",row.names=F)
reactomepaths <- pathways("hsapiens", "reactome")
reactome_result <- data.frame()
for (i in 1:length(reactomepaths)) {
kid <- attributes(reactomepaths[[i]])$id
ktitle <- attributes(reactomepaths[[i]])$title
reactome_1 <- convertIdentifiers(convertIdentifiers(reactomepaths[[i]], "symbol"),"KEGGCOMP")
reactome_result_temp <- edges(reactome_1,"mixed") %>%
dplyr::mutate(pathwayid=kid) %>%
dplyr::mutate(pathway=ktitle)
reactome_result <- rbind(reactome_result,reactome_result_temp)
}
write.table(reactome_result,"result/Graphite/gene-metabolite-reactome.txt",quote=F,sep="\t",row.names=F)2.3.2 Download the information of metabolism pathways
### wikipathway
rev all.txt|cut -d"_" -f2|rev >all.1.txt
cat /dev/null > test.txt
cat all.1.txt |while read line
do
grep TERM homo/*${line}_*.gpml |cut -d">" -f2|cut -d"<" -f1| tr -s "\n" ";"|sed -e 's/;$//g' | sed "s/^/${line}
/" >>test.txt
done
grep -i metabolic test.txt >test_metabolic.txt
### smpdb
wget https://smpdb.ca/downloads/smpdb_pathways.csv.zip
### reactome
#R-HSA-1430728 is the metabolism, then choose the hierarchical is lower than it
### kegg
#choose the metabolism pathway2.3.3 Extract the data of metabolism
library(dplyr)
kegg_data_metabolism <- data.table::fread("result/Graphite/gene-metabolite-kegg.txt") %>%
as.data.frame() %>%
dplyr::filter(pathway %in% unique(kegg_pathway$PATHWAY)) %>%
dplyr::select(-c("direction","type")) %>%
unique() %>%
dplyr::filter(src_type =="KEGGCOMP" | dest_type=="KEGGCOMP") %>%
dplyr::mutate(src_new = ifelse(src_type=="KEGGCOMP",src,dest),
dest_new = ifelse(src_type=="KEGGCOMP",dest,src),
src_type_new=ifelse(src_type=="KEGGCOMP",src_type,dest_type),
dest_type_new=ifelse(src_type=="KEGGCOMP",dest_type,src_type)) %>%
dplyr::mutate(src_new1=ifelse(src_type=="KEGGCOMP" & dest_type=="KEGGCOMP",
ifelse(src_new>dest_new,src_new,dest_new),
src_new)) %>%
dplyr::mutate(dest_new1=ifelse(src_type=="KEGGCOMP" & dest_type=="KEGGCOMP",
ifelse(src_new>dest_new,dest_new,src_new),
dest_new)) %>%
dplyr::select(-c("src_type","src","dest_type","dest","src_new","dest_new")) %>%
dplyr::rename("src_type"="src_type_new","src"="src_new1","dest_type"="dest_type_new","dest"="dest_new1") %>%
dplyr::select(c("src_type","src","dest_type","dest","pathwayid","pathway")) %>%
unique()
write.table(kegg_data_metabolism,"result/Graphite/gene-metabolite-kegg_metabolism.txt",quote=F,sep="\t",row.names=F)
### smpdb
metabolism_pathway <- read.csv("result/Graphite/smpdb_pathways.csv") %>%
dplyr::filter(Subject=="Metabolic")
smpdb_data <- data.table::fread("result/Graphite/gene-metabolite-smpdb.txt") %>%
as.data.frame()
smpdb_metabolism <- smpdb_data %>%
dplyr::filter(pathway %in% metabolism_pathway$Name) %>%
dplyr::filter(src_type=="KEGGCOMP"|dest_type=="KEGGCOMP") %>%
dplyr::filter(!grepl("De Novo",pathway)) %>%
dplyr::mutate(pathway_new=ifelse(grepl("Phosphatidylcholine Biosynthesis",pathway),
"Phosphatidylcholine Biosynthesis",pathway)) %>%
dplyr::mutate(pathway_new=ifelse(grepl("Cardiolipin Biosynthesis",pathway_new),
"Cardiolipin Biosynthesis",pathway_new)) %>%
dplyr::mutate(pathway_new=ifelse(grepl("Phosphatidylethanolamine Biosynthesis",pathway_new),
"Phosphatidylethanolamine Biosynthesis",pathway_new)) %>%
dplyr::mutate(pathway_new=ifelse(grepl("Mitochondrial Beta-Oxidation",pathway_new),
"Mitochondrial Beta-Oxidation",pathway_new)) %>%
dplyr::select(-"pathway") %>%
dplyr::rename("pathway"="pathway_new") %>%
dplyr::mutate(src_new = ifelse(src_type=="KEGGCOMP",src,dest),
dest_new = ifelse(src_type=="KEGGCOMP",dest,src),
src_type_new=ifelse(src_type=="KEGGCOMP",src_type,dest_type),
dest_type_new=ifelse(src_type=="KEGGCOMP",dest_type,src_type)) %>%
dplyr::mutate(src_new1=ifelse(src_type=="KEGGCOMP" & dest_type=="KEGGCOMP",
ifelse(src_new>dest_new,src_new,dest_new),
src_new)) %>%
dplyr::mutate(dest_new1=ifelse(src_type=="KEGGCOMP" & dest_type=="KEGGCOMP",
ifelse(src_new>dest_new,dest_new,src_new),
dest_new)) %>%
dplyr::select(-c("src_type","src","dest_type","dest","src_new","dest_new")) %>%
dplyr::rename("src_type"="src_type_new","src"="src_new1","dest_type"="dest_type_new","dest"="dest_new1") %>%
dplyr::select(c("src_type","src","dest_type","dest","pathwayid","pathway")) %>%
unique()
### reactome
## metabolism pathway ###
reactome_metabolism_pathwayid <- data.table::fread("result/Graphite/ReactomePathwaysRelation.txt",header=F) %>%
as.data.frame() %>%
dplyr::filter(V1=="R-HSA-1430728")
reactome_metabolism_data <- data.table::fread("result/Graphite/gene-metabolite-reactome.txt") %>%
as.data.frame() %>%
dplyr::filter(pathwayid %in% reactome_metabolism_pathwayid$V2) %>%
dplyr::filter(src_type=="KEGGCOMP"|dest_type=="KEGGCOMP") %>%
dplyr::mutate(src_new = ifelse(src_type=="KEGGCOMP",src,dest),
dest_new = ifelse(src_type=="KEGGCOMP",dest,src),
src_type_new=ifelse(src_type=="KEGGCOMP",src_type,dest_type),
dest_type_new=ifelse(src_type=="KEGGCOMP",dest_type,src_type)) %>%
dplyr::mutate(src_new1=ifelse(src_type=="KEGGCOMP" & dest_type=="KEGGCOMP",
ifelse(src_new>dest_new,src_new,dest_new),
src_new)) %>%
dplyr::mutate(dest_new1=ifelse(src_type=="KEGGCOMP" & dest_type=="KEGGCOMP",
ifelse(src_new>dest_new,dest_new,src_new),
dest_new)) %>%
dplyr::select(-c("src_type","src","dest_type","dest","src_new","dest_new")) %>%
dplyr::rename("src_type"="src_type_new","src"="src_new1","dest_type"="dest_type_new","dest"="dest_new1") %>%
dplyr::select(c("src_type","src","dest_type","dest","pathwayid","pathway")) %>%
unique()
write.table(reactome_metabolism_data,"result/Graphite/gene-metabolite-reactome_metabolism.txt",quote=F,sep="\t",row.names=F)
### wikipathways
wikipathway_metabolism_id <- data.table::fread("wikipath/test_metabolic.txt",header=F) %>%
as.data.frame()
wikipathway_data <- data.table::fread("result/Graphite/gene-metabolite-wikipaths.txt") %>%
as.data.frame() %>%
dplyr::filter(pathwayid %in% wikipathway_metabolism_id$V1) %>%
dplyr::filter(src_type=="KEGGCOMP"|dest_type=="KEGGCOMP") %>%
dplyr::mutate(src_new = ifelse(src_type=="KEGGCOMP",src,dest),
dest_new = ifelse(src_type=="KEGGCOMP",dest,src),
src_type_new=ifelse(src_type=="KEGGCOMP",src_type,dest_type),
dest_type_new=ifelse(src_type=="KEGGCOMP",dest_type,src_type)) %>%
dplyr::mutate(src_new1=ifelse(src_type=="KEGGCOMP" & dest_type=="KEGGCOMP",
ifelse(src_new>dest_new,src_new,dest_new),
src_new)) %>%
dplyr::mutate(dest_new1=ifelse(src_type=="KEGGCOMP" & dest_type=="KEGGCOMP",
ifelse(src_new>dest_new,dest_new,src_new),
dest_new)) %>%
dplyr::select(-c("src_type","src","dest_type","dest","src_new","dest_new")) %>%
dplyr::rename("src_type"="src_type_new","src"="src_new1","dest_type"="dest_type_new","dest"="dest_new1") %>%
dplyr::select(c("src_type","src","dest_type","dest","pathwayid","pathway")) %>%
unique()
write.table(wikipathway_data,"result/Graphite/gene-metabolite-wikipathway_metabolism.txt",quote=F,sep="\t",row.names=F)2.4 BiGG
2.4.1 Down all the models in the BiGG
Then, choice the model from Homo sapiens,then in reserve model is iAT_PLT_636, iAB_RBC_283, RECON1, Recon3D.
Get all the reactions names in the 4 human models.
curl 'http://bigg.ucsd.edu/api/v2/models/iAT_PLT_636/reactions' >result/BiGG/iAT_PLT_636.reactions
curl 'http://bigg.ucsd.edu/api/v2/models/iAB_RBC_283/reactions' >result/BiGG/iAB_RBC_283.reactions
curl 'http://bigg.ucsd.edu/api/v2/models/RECON1/reactions' >result/BiGG/RECON1.reactions
curl 'http://bigg.ucsd.edu/api/v2/models/Recon3D/reactions' >result/BiGG/Recon3D.reactionsChange the json reaction names to txt file.
2.4.2 Download the every reaction
curl 'http://bigg.ucsd.edu/api/v2/models/iAT_PLT_636/reactions/10FTHF6GLUtm' >result/BiGG/iAT_PLT_636/reaction/json/10FTHF6GLUtm.txt
curl 'http://bigg.ucsd.edu/api/v2/models/iAT_PLT_636/reactions/10FTHF7GLUtl' >result/BiGG/iAT_PLT_636/reaction/json/10FTHF7GLUtl.txtGet the information in every reaction.
The R script that change the reaction information in json to txt.
args <- commandArgs(T)
library(dplyr)
mydata <- paste0("result/BiGG/",args[1],"/reaction/json/",args[2],".txt")
recon1<-jsonlite::stream_in(file(mydata),pagesize = 100)
metabolite_biggid <- recon1$metabolites[[1]]$bigg_id
metabolites <- recon1$metabolites[[1]]$name
compartment_bigg_id <- recon1$metabolites[[1]]$compartment_bigg_id
gene <- recon1$results[[1]]$genes[[1]]$name
subsystem <- recon1$results[[1]]$subsystem
model=args[1]
if (length(gene)>0) {
dd <- data.frame(metabolite_biggid1=paste(metabolite_biggid,compartment_bigg_id,sep="_"),src_type="metabolite",src=metabolites,metabolite_biggid2=NA,dest_type="gene",dest=paste(gene,collapse=";"),subsystems=subsystem,models=model)
# write.table(dd,paste0(args[1],"_txt/",args[2],".txt"),quote=F,sep="\t",row.names=F)
}else {
dd <- data.frame()
}
if (length(unique(recon1$metabolites[[1]]$stoichiometry))>1) {
metabolites_stoichiometry1 <- recon1$metabolites[[1]] %>%
dplyr::filter(stoichiometry>0)
metabolites_stoichiometry2 <- recon1$metabolites[[1]] %>%
dplyr::filter(stoichiometry<0)
result <- data.frame()
for (i in 1:nrow(metabolites_stoichiometry1)) {
for (j in 1:nrow(metabolites_stoichiometry2)) {
temp <- data.frame(metabolite_biggid1=paste(metabolites_stoichiometry1$bigg_id[i],metabolites_stoichiometry1$compartment_bigg_id[i],sep="_"),src_type="metabolite",src=metabolites_stoichiometry1$name[i],
metabolite_biggid2=paste(metabolites_stoichiometry2$bigg_id[j],metabolites_stoichiometry2$compartment_bigg_id[j],sep="_"),dest_type="metabolite",dest=metabolites_stoichiometry2$name[j],subsystems=subsystem,models=model)
result <- rbind(result,temp)
}
}
result_final <- rbind(dd,result)
write.table(result_final,paste0("result/BiGG/",args[1],"/reaction/txt/",args[2],".txt"),quote=F,sep="\t",row.names=F)
}else {
print(0)
}Run the R script that change the reaction information in json to txt in batch.
cat iAB_RBC_283.reaction.txt |while read line
do
Rscript reaction_json2txt.R iAB_RBC_283 $line
done
cat iAT_PLT_636.reaction.txt |while read line
do
Rscript reaction_json2txt.R iAT_PLT_636 $line
done
cat RECON1.reaction.txt |while read line
do
Rscript reaction_json2txt.R RECON1 $line
done
cat Recon3D.reaction.txt |while read line
do
Rscript reaction_json2txt.R Recon3D $line
doneCombine all the reactions info include the gene-metabolite pair,the metabolite-metabolite pair,subsystem,model.
cat result/BiGG/iAB_RBC_283/reaction/txt/*|grep -v metabolite_biggid1 >result/BiGG/cat_reaction_info_iAB_RBC_283.txt
cat result/BiGG/iAT_PLT_636/reaction/txt/*|grep -v metabolite_biggid1 >result/BiGG/cat_reaction_info_iAT_PLT_636.txt
cat result/BiGG/Recon3D/reaction/txt/*|grep -v metabolite_biggid1 >result/BiGG/cat_reaction_info_Recon3D.txt
cat result/BiGG/RECON1/reaction/txt/*|grep -v metabolite_biggid1 >result/BiGG/cat_reaction_info_RECON1.txt2.4.3 Exact all the metabolites names in every model in BiGG
curl 'http://bigg.ucsd.edu/api/v2/models/iAB_RBC_283/metabolites' >result/BiGG/iAB_RBC_283_metabolite.json
curl 'http://bigg.ucsd.edu/api/v2/models/iAT_PLT_636/metabolites' >result/BiGG/iAT_PLT_636_metabolite.json
curl 'http://bigg.ucsd.edu/api/v2/models/Recon3D/metabolites' >result/BiGG/Recon3D_metabolite.json
curl 'http://bigg.ucsd.edu/api/v2/models/RECON1/metabolites' >result/BiGG/RECON1_metabolite.jsonChange the json to txt.
args=commandArgs(T)
recon1<-jsonlite::stream_in(file(paste0("result/BiGG/",args[1],"_metabolite.json")),pagesize = 100)
aa <- recon1$results[[1]]
write.table(aa$bigg_id,paste0("result/BiGG/",args[1],"_metabolite.txt"),row.names = F,col.names=F,sep="\t",quote=F)Use the script.
Rscript metabolitename_json2txt.R iAB_RBC_283
Rscript metabolitename_json2txt.R iAT_PLT_636
Rscript metabolitename_json2txt.R RECON1
Rscript metabolitename_json2txt.R Recon3DDownload every metabolite information.
curl 'http://bigg.ucsd.edu/api/v2/models/iAB_RBC_283/metabolites/13dpg_c' >result/BiGG/iAB_RBC_283_metabolites/13dpg_c.json
curl 'http://bigg.ucsd.edu/api/v2/models/iAB_RBC_283/metabolites/23dpg_c' >result/BiGG/iAB_RBC_283_metabolites/23dpg_c.json
curl 'http://bigg.ucsd.edu/api/v2/models/iAB_RBC_283/metabolites/2kmb_c' >result/BiGG/iAB_RBC_283_metabolites/2kmb_c.json
curl 'http://bigg.ucsd.edu/api/v2/models/iAB_RBC_283/metabolites/2pg_c' >result/BiGG/iAB_RBC_283_metabolites/2pg_c.jsonThe R script that exact the KEGG ID in metabolite file.
args <- commandArgs(T)
mydata <- paste0("result/BiGG/",args[1],"/metabolite/json/",args[2],".json")
# "iAB_RBC_283_metabolites/glu__L_c.json"
recon1<-jsonlite::stream_in(file(mydata),pagesize = 100)
kegg_id <- paste(recon1$database_links$`KEGG Compound`[[1]]$id,collapse = ";")
name <- recon1$name
result <- data.frame(name=name,kegg_id=kegg_id,source=args[1])
write.table(result,paste0("result/BiGG/",args[1],"/metabolite/txt/",args[2],".txt"),quote=F,row.names = F,sep="\t")Run the R script that exact the KEGG ID in the metabolite file.
cat result/BiGG/iAB_RBC_283_metabolite.txt|while read line
do
Rscript metabolite_json2txt.R iAB_RBC_283 ${line}
done
cat result/BiGG/iAT_PLT_636_metabolite.txt |while read line
do
Rscript metabolite_json2txt.R iAT_PLT_636 ${line}
done
cat result/BiGG/RECON1_metabolite.txt|while read line
do
Rscript metabolite_json2txt.R RECON1 ${line}
done
cat result/BiGG/Recon3D_metabolite.txt|while read line
do
Rscript metabolite_json2txt.R Recon3D ${line}
doneCombine all the metabolite in 1 file for every model.
cat result/BiGG/iAB_RBC_283/metabolite/txt/*_metabolite.txt|grep -v kegg_id >result/BiGG/iAB_RBC_283_metabolite_all.txt
cat result/BiGG/iAT_PLT_636/metabolite/txt/*_metabolite.txt |grep -v kegg_id >result/BiGG/iAT_PLT_636_metabolite_all.txt
cat result/BiGG/RECON1/metabolite/txt/*_metabolite.txt|grep -v kegg_id >result/BiGG/RECON1_metabolite_all.txt
cat result/BiGG/Recon3D/metabolite/txt/*_metabolite.txt|grep -v kegg_id >result/BiGG/Recon3D_metabolite_all.txt2.4.4 The R script that change the metabolite name to kegg id in the final output
args <- commandArgs(T)
library(dplyr)
metabolite_info <- data.table::fread(paste0("result/BiGG/",args[1],"_metabolite_all.txt"),header=F) %>%
as.data.frame()
gene_metabolite_pairs <- data.table::fread(paste0("result/BiGG/cat_reaction_info_",args[1],".txt"),header=F) %>%
as.data.frame() %>%
dplyr::left_join(metabolite_info,by=c("V3"="V1")) %>%
dplyr::left_join(metabolite_info,by=c("V6"="V1")) %>%
dplyr::mutate(src=ifelse(V2.y=="" | is.na(V2.y),
V3.x,V2.y)) %>%
dplyr::mutate(dest=ifelse(V2=="" | is.na(V2),
V6,V2)) %>%
dplyr::select(c("V2.x","src","V5","dest","V7","V8")) %>%
dplyr::rename("src_type"="V2.x","dest_type"="V5","subsystems"="V7","model"="V8") %>%
tidyr::separate_rows(src,sep=";") %>%
tidyr::separate_rows(dest,sep=";") %>%
dplyr::filter(src != dest) %>%
unique()
write.table(gene_metabolite_pairs,paste0("result/BiGG/result_",args[1],".txt"),quote=F,row.names = F,sep="\t")Use the R script that change the metabolite name to KEGG ID.
2.4.5 Combine the data from BiGG and graphite
cat result/BiGG/result.BiGG.txt ../graphite/result.graphite.txt |sed s/KEGGCOMP/metabolite/g |sed s/SYMBOL/gene/g |sort|uniq >gene-metabolite_BiGG_graphite.txtdat <- data.table::fread("/Users/guituantuan/Desktop/projects/database/gene-metabolite/gene-metabolite_BiGG_graphite.txt") %>%
as.data.frame() %>%
dplyr::filter(keggId!=gene) %>%
dplyr::mutate(keggId_new=ifelse(src_type=="metabolite" & dest_type=="metabolite",
ifelse(keggId>gene,keggId,gene),
keggId)) %>%
dplyr::mutate(gene_new=ifelse(src_type=="metabolite" & dest_type=="metabolite",
ifelse(keggId>gene,gene,keggId),
gene)) %>%
dplyr::select(-c("keggId","gene")) %>%
dplyr::rename("keggId"="keggId_new","gene"="gene_new") %>%
dplyr::select(c("src_type","keggId","dest_type","gene","subsystems"))
dat$subsystems <- stringr::str_to_title(dat$subsystems)
kegg_pathway1 <- kegg_pathway %>%
dplyr::mutate(pathway1=stringr::str_to_title(PATHWAY)) %>%
dplyr::select(c("pathway1","pathway_type")) %>%
dplyr::rename("subsystems"="pathway1") %>%
unique()
pathway_type_1 <- data.table::fread("pathway_type.txt",header=F) %>%
as.data.frame()
names(pathway_type_1) <- c("subsystems","pathway_type")
aa <- rbind(kegg_pathway1,pathway_type_1)
dat2 <- dat %>%
dplyr::mutate(subsystems=ifelse(subsystems=="Urea Cycle/Amino Group Metabolism","Urea Cycle",subsystems)) %>%
dplyr::mutate(subsystems=ifelse(subsystems=="Nucleotides","Nucleotide Interconversion",subsystems)) %>%
dplyr::mutate(subsystems=ifelse(subsystems=="Citric Acid Cycle","TCA Cycle",subsystems)) %>%
dplyr::mutate(subsystems=ifelse(subsystems=="Citrate Cycle (Tca Cycle)","TCA Cycle",subsystems)) %>%
dplyr::mutate(subsystems=ifelse(subsystems=="The Citric Acid (Tca) Cycle And Respiratory Electron Transport","TCA Cycle",subsystems)) %>%
dplyr::mutate(subsystems=ifelse(grepl("Vitamin",subsystems),"Vitamin Metabolism",subsystems)) %>%
dplyr::mutate(subsystems=ifelse(grepl("Tca Cycle",subsystems),"TCA Cycle",subsystems)) %>%
dplyr::mutate(subsystems=ifelse(grepl("Amino Acid",subsystems),"Amino Acid Metabolism",subsystems)) %>%
dplyr::mutate(subsystems=ifelse(grepl("Coa ",subsystems),"CoA Metabolism",subsystems)) %>%
dplyr::mutate(subsystems=ifelse(grepl("Fatty Acid",subsystems),"Fatty Acid Metabolism",subsystems)) %>%
dplyr::mutate(subsystems=ifelse(subsystems=="Fatty Acid Oxidation","Fatty Acid Metabolism",subsystems)) %>%
dplyr::mutate(subsystems=ifelse(subsystems=="Aminosugar Metabolism","Amino Sugar Metabolism",subsystems)) %>%
unique() %>%
dplyr::left_join(aa,by="subsystems")
write.table(dat2,"/Users/guituantuan/Desktop/projects/database/gene-metabolite/gene-metabolite_BiGG_graphite_uniq.txt",
quote=F,row.names=F,sep="\t")