Chapter 2 Database construction

2.1 KEGG pathway’s metabolite and gene

This is the metabolites and the genes in every metabolism KEGG pathway.6821个代谢物和基因.

library(KEGGREST)
library(dplyr)

## 所有人代谢相关通路，这个是之前自己整理的

pathway_meta <- data.table::fread("result/pathway_hsa.txt",sep="\t",header=F) %>%
  as.data.frame()

## 对每个通路提取对应的基因和代谢物
result_gene <- data.frame()
result_compound <- data.frame()
for (i in 1:length(pathway_meta$V2)){

  print(pathway_meta$V2[i])

  path <- keggGet(pathway_meta$V2[i])

  #  print(pathway_meta$V2[i])  
  ## 提取此通路的基因信息
  gene.info <- path[[1]]$GENE %>%
    as.data.frame() %>%
    dplyr::rename("V1"=".") %>%
    tidyr::separate(V1,sep=";","V1") %>%
    dplyr::pull(V1)
  
  ## 提取基因中的gene symbol
  gene.symbol <- unique(gene.info[1:length(gene.info)%%2 == 0])
  #gene.id <- gene.info[1:length(gene.info)%%2 == 1]  
  
  # 生成gene symbol和Entrez ID匹配的数据框
#  gene.df <- data.frame(gene.symbol = gene.symbol,gene.id = gene.id,kegg=pathway_meta$V2[i],kegg_path=path[[1]]$PATHWAY_MAP)
  gene.df <- data.frame(type="gene",name = gene.symbol,kegg_pathwayid=pathway_meta$V2[i],kegg_pathwayname=path[[1]]$PATHWAY_MAP,kegg_category =pathway_meta$V1[i])
  #head(gene.df)
  result_gene <- rbind(result_gene,gene.df)
  
  ## 提取此通路的代谢物信息 (这里会有一些药物，是否去掉？"hsa00514","hsa00533", "hsa00511"通路没有compound？方老师帮忙check一下？)
  if (length(path[[1]]$COMPOUND)>0) {
    compound.info <- path[[1]]$COMPOUND %>%
      as.data.frame() %>%
      dplyr::rename("V1"=".") %>%
      rownames()
  
  
  
  # 生成compound和对应的pathway信息
  compound.df <- data.frame(type="metabolite",name = compound.info,kegg_pathwayid=pathway_meta$V2[i],kegg_pathwayname=path[[1]]$PATHWAY_MAP,kegg_category =pathway_meta$V1[i])
  result_compound <- rbind(result_compound,compound.df)
}
  
}
result <- rbind(result_gene,result_compound) %>%
  as_tibble()
dim(result)
write.table(result,"result/KEGGpathway_metabolite_gene-v0117.txt",quote=F,row.names = F,sep="\t")

2.2 KEGG metabolite-metabolite pairs and metabolite-gene pairs

2.2.1 KEGG’s reaction and compound’s download

## the code is runing in 2024.01.15
library(KEGGREST)
library(plyr)

#source("/Users/guituantuan/Downloads/R-packages/RbioRXN-master/R/get.kegg.all.R")
#source("/Users/guituantuan/Downloads/R-packages/RbioRXN-master/R/get.kegg.byId.R")

##断掉的话就多试几次
source("https://raw.githubusercontent.com/cran/RbioRXN/master/R/get.kegg.all.R")
source("https://raw.githubusercontent.com/cran/RbioRXN/master/R/get.kegg.byId.R")

## KEGG数据库中的代谢反应和代谢物注释信息 -------------------------------------------------
#8:33-
keggAll = get.kegg.all()

saveRDS(keggAll, file = "result/keggAll-v0115.RData")
#load("result/keggAll-v0115.RData")
#keggAll <- readRDS("result/keggAll-v0115.RData")

## 查看代谢反应和代谢物的数量
dim(keggAll$reaction)
# [1] 11990    14
dim(keggAll$compound)
# [1] 19270    25


write.csv(keggAll$reaction,file = "result/keggAllreaction_v20240115.csv",row.names = F)
write.csv(keggAll$compound,file = "result/keggAllcompound_v20240115.csv",row.names = F)

2.2.2 Downloading the enzyme and its corresponding gene information from the htext,and extract the gene and the enzyme’s relationship

Download the htext file.

https://www.genome.jp/kegg-bin/get_htext?hsa01000
grep "^E" result/hsa01000.keg | sed 's/E        //g'|cut -d" " -f2 |cut -d";" -f1 >result/gene.txt
grep "^E" result/hsa01000.keg | sed 's/E        //g'|sed 's/\[EC\:/    /g' |sed 's/\] //g' |cut -f3 >result/enzyme.txt 
###sed 's/\[EC\:/    /g'后面为制表符
paste result/gene.txt result/enzyme.txt |grep -v "D-dopachrome" |grep -v "cytochrome"|grep -v "putative" >result/gene_enzyme.txt

the gene_enzyme.txt is like this

ADH5^I1.1.1.284 1.1.1.1$
ADH1A^I1.1.1.1$

2.2.3 Extracting the metabolite-gene pairs and metabolite-metabolite pairs

library(dplyr)

## Transformed the enzyme and its corresponding gene to one-to-one relationship
gene_enzyme <- data.table::fread("result/gene_enzyme.txt",header=F) %>%
  as.data.frame() %>%
  tidyr::separate_rows("V2",sep=" ") %>%
  unique() %>%
  dplyr::rename("gene_name"="V1") %>%
  dplyr::rename("enzyme"="V2")

## all the reaction in kegg
a <- read.csv("result/keggAllreaction_v20240115.csv")

# get all the metabolite and its corresponding gene from all the reaction in kegg

## the metabolite-gene pairs are exacted from every compound in the equation and the gene with the enzyme

metabolite_gene <- a %>%
  dplyr::select(c("EQUATION","ENZYME")) %>%
  tidyr::separate_rows(EQUATION,sep=" <=> ") %>%
  tidyr::separate_rows(EQUATION,sep=" ") %>%
  dplyr::filter(grepl("^C",EQUATION)) %>%
  tidyr::separate_rows(ENZYME,sep="///") %>%
  dplyr::rename("compound"="EQUATION") %>%
  dplyr::rename("enzyme"="ENZYME") %>%
  dplyr::left_join(gene_enzyme,by="enzyme") %>%
  dplyr::filter(!is.na(gene_name)) %>%
  dplyr::select(-"enzyme") %>%
  dplyr::mutate(compound=gsub("[(]n[)]","",compound)) %>%
  dplyr::mutate(compound=gsub("[(]m[)]","",compound)) %>%
  dplyr::mutate(compound=gsub("[(]n[+]1[)]","",compound)) %>%
  dplyr::mutate(compound=gsub("[(]n[+]m[)]","",compound)) %>%
  dplyr::mutate(compound=gsub("[(]n[-]1[)]","",compound)) %>%
  unique() %>%
  dplyr::mutate(src_type="metabolite") %>%
  dplyr::mutate(dest_type="gene") %>%
  dplyr::rename("src"="compound") %>%
  dplyr::rename("dest"="gene_name") %>%
  dplyr::select(c("src_type","src","dest_type","dest"))

# get all the metabolite-metabolite pairs in the reactions
## get the metabolite from the EQUATION, the metabolite-metabolite pairs are exacted from every compound in left of equation and every compound in right of equation.

metabolite_metabolite <- a %>%
  dplyr::select(c("EQUATION","ENZYME")) %>%
  tidyr::separate(EQUATION,c("a","b"),sep=" <=> ")%>%
  tidyr::separate_rows(a,sep=" ") %>%
  tidyr::separate_rows(b,sep=" ") %>%
  dplyr::filter(grepl("^C",a)) %>%
  dplyr::filter(grepl("^C",b)) %>%
  dplyr::select(c("a","b")) %>%
  unique() %>%
  dplyr::mutate(metabolite_1=gsub("[(]n[)]","",a)) %>%
  dplyr::mutate(metabolite_1=gsub("[(]n[+]m[)]","",metabolite_1)) %>%
  dplyr::mutate(metabolite_1=gsub("[(]side","",metabolite_1)) %>%
  dplyr::mutate(metabolite_1=gsub("[(]m[+]n[)]","",metabolite_1)) %>%
  dplyr::mutate(metabolite_1=gsub("[(]n[+]1[)]","",metabolite_1)) %>%
  dplyr::mutate(metabolite_1=gsub("[(]m[)]","",metabolite_1)) %>%
  dplyr::mutate(metabolite_2=gsub("[(]n[)]","",b)) %>%
  dplyr::mutate(metabolite_2=gsub("[(]n[+]m[)]","",metabolite_2)) %>%
  dplyr::mutate(metabolite_2=gsub("[(]side","",metabolite_2)) %>%
  dplyr::mutate(metabolite_2=gsub("[(]m[+]n[)]","",metabolite_2)) %>%
  dplyr::mutate(metabolite_2=gsub("[(]n[+]1[)]","",metabolite_2)) %>%
  dplyr::mutate(metabolite_2=gsub("[(]m[)]","",metabolite_2)) %>%
  dplyr::mutate(metabolite_2=gsub("[(]n[-]1[)]","",metabolite_2)) %>%
  dplyr::mutate(metabolite_2=gsub("[(]x[)]","",metabolite_2)) %>%
  dplyr::mutate(metabolite_2=gsub("[(]n[-]x[)]","",metabolite_2)) %>%
  dplyr::mutate(metabolite_2=gsub("[(]n[+]2[)]","",metabolite_2)) %>%
  dplyr::mutate(metabolite_2=gsub("[(]m[-]1[)]","",metabolite_2)) %>%
  dplyr::select(-c("a","b")) %>%
  dplyr::filter(metabolite_1 != metabolite_2) %>%
  dplyr::mutate(src_type="metabolite") %>%
  dplyr::mutate(dest_type="metabolite") %>%
  dplyr::rename("src"="metabolite_1") %>%
  dplyr::rename("dest"="metabolite_2") %>%
  dplyr::select(c("src_type","src","dest_type","dest"))


write.table(metabolite_gene,"result/KEGG_metabolite_gene.txt",quote=F,row.names = F,sep="\t")
write.table(metabolite_metabolite,"result/KEGG_metabolite_metabolite.txt",quote=F,row.names = F,sep="\t")

2.2.4 Conclusion

The number of metabolite-gene pairs is 33537; The number of metabolite-metabolite pairs is 33537. The number of metabolite is 11788; The number of gene is 2607.

2.3 Graphite

2.3.1 Download all the data from graphite

library(graphite)
library(clipper)
library(dplyr)

kpaths <- pathways("hsapiens", "kegg")
kpaths_result <- data.frame()
for (i in 1:length(kpaths)) {
  kid <- attributes(kpaths[[i]])$id
  ktitle <- attributes(kpaths[[i]])$title
  kpaths_1 <- convertIdentifiers(convertIdentifiers(kpaths[[i]], "symbol"),"KEGGCOMP")
  kpaths_result_temp <- edges(kpaths_1,"mixed") %>%
    dplyr::mutate(pathwayid=kid) %>%
    dplyr::mutate(pathway=ktitle)
  kpaths_result <- rbind(kpaths_result,kpaths_result_temp)
}
write.table(kpaths_result,"result/Graphite/gene-metabolite-kegg.txt",quote=F,sep="\t",row.names=F)


spaths <- pathways("hsapiens", "smpdb")
smpdb_result <- data.frame()
for (i in 1:length(spaths)) {
  kid <- attributes(spaths[[i]])$id
  ktitle <- attributes(spaths[[i]])$title
  smpdb_1 <- convertIdentifiers(convertIdentifiers(spaths[[i]], "symbol"),"KEGGCOMP")
  smpdb_result_temp <- edges(smpdb_1, "mixed") %>%
    dplyr::mutate(pathwayid=kid) %>%
    dplyr::mutate(pathway=ktitle)
  
  smpdb_result <- rbind(smpdb_result,smpdb_result_temp)
}
write.table(smpdb_result,"result/Graphite/gene-metabolite-smpdb.txt",quote=F,sep="\t",row.names=F)

wikipaths <- pathways("hsapiens", "wikipathways")
wikipaths_result <- data.frame()
for (i in 1:length(wikipaths)) {
  kid <- attributes(wikipaths[[i]])$id
  ktitle <- attributes(wikipaths[[i]])$title
  wikipaths_1 <- convertIdentifiers(convertIdentifiers(wikipaths[[i]], "symbol"),"KEGGCOMP")
  wikipaths_result_temp <- edges(wikipaths_1,"mixed") %>%
    dplyr::mutate(pathwayid=kid) %>%
    dplyr::mutate(pathway=ktitle)
  wikipaths_result <- rbind(wikipaths_result,wikipaths_result_temp)

}
write.table(wikipaths_result,"result/Graphite/gene-metabolite-wikipaths.txt",quote=F,sep="\t",row.names=F)


reactomepaths <- pathways("hsapiens", "reactome")
reactome_result <- data.frame()
for (i in 1:length(reactomepaths)) {
  kid <- attributes(reactomepaths[[i]])$id
  ktitle <- attributes(reactomepaths[[i]])$title
  reactome_1 <- convertIdentifiers(convertIdentifiers(reactomepaths[[i]], "symbol"),"KEGGCOMP")
  reactome_result_temp <- edges(reactome_1,"mixed") %>%
    dplyr::mutate(pathwayid=kid) %>%
    dplyr::mutate(pathway=ktitle)
  reactome_result <- rbind(reactome_result,reactome_result_temp)
}
write.table(reactome_result,"result/Graphite/gene-metabolite-reactome.txt",quote=F,sep="\t",row.names=F)

2.3.2 Download the information of metabolism pathways


### wikipathway
rev all.txt|cut -d"_" -f2|rev >all.1.txt
cat /dev/null > test.txt
cat all.1.txt |while read line
do
grep TERM homo/*${line}_*.gpml |cut -d">" -f2|cut -d"<" -f1|  tr -s "\n" ";"|sed -e 's/;$//g' | sed "s/^/${line}
        /" >>test.txt
done

grep -i metabolic test.txt >test_metabolic.txt


### smpdb
wget https://smpdb.ca/downloads/smpdb_pathways.csv.zip

### reactome
#R-HSA-1430728 is the metabolism, then choose the hierarchical is lower than it

### kegg
#choose the metabolism pathway

2.3.3 Extract the data of metabolism

library(dplyr)

kegg_data_metabolism <- data.table::fread("result/Graphite/gene-metabolite-kegg.txt") %>%
  as.data.frame() %>%
  dplyr::filter(pathway %in% unique(kegg_pathway$PATHWAY)) %>%
  dplyr::select(-c("direction","type")) %>%
  unique() %>%
  dplyr::filter(src_type =="KEGGCOMP" | dest_type=="KEGGCOMP") %>%
  dplyr::mutate(src_new = ifelse(src_type=="KEGGCOMP",src,dest),
                dest_new = ifelse(src_type=="KEGGCOMP",dest,src),
                src_type_new=ifelse(src_type=="KEGGCOMP",src_type,dest_type),
                dest_type_new=ifelse(src_type=="KEGGCOMP",dest_type,src_type)) %>%
  dplyr::mutate(src_new1=ifelse(src_type=="KEGGCOMP" & dest_type=="KEGGCOMP",
                                ifelse(src_new>dest_new,src_new,dest_new),
                                src_new)) %>%
  dplyr::mutate(dest_new1=ifelse(src_type=="KEGGCOMP" & dest_type=="KEGGCOMP",
                                 ifelse(src_new>dest_new,dest_new,src_new),
                                 dest_new)) %>%
  dplyr::select(-c("src_type","src","dest_type","dest","src_new","dest_new")) %>%
  dplyr::rename("src_type"="src_type_new","src"="src_new1","dest_type"="dest_type_new","dest"="dest_new1") %>%
  dplyr::select(c("src_type","src","dest_type","dest","pathwayid","pathway")) %>%
  unique()
write.table(kegg_data_metabolism,"result/Graphite/gene-metabolite-kegg_metabolism.txt",quote=F,sep="\t",row.names=F)

### smpdb
metabolism_pathway <- read.csv("result/Graphite/smpdb_pathways.csv") %>%
  dplyr::filter(Subject=="Metabolic")

smpdb_data <- data.table::fread("result/Graphite/gene-metabolite-smpdb.txt") %>%
  as.data.frame()
smpdb_metabolism <-  smpdb_data %>%
  dplyr::filter(pathway %in% metabolism_pathway$Name) %>%
  dplyr::filter(src_type=="KEGGCOMP"|dest_type=="KEGGCOMP") %>%
  dplyr::filter(!grepl("De Novo",pathway)) %>%
  dplyr::mutate(pathway_new=ifelse(grepl("Phosphatidylcholine Biosynthesis",pathway),
                                   "Phosphatidylcholine Biosynthesis",pathway)) %>%
  dplyr::mutate(pathway_new=ifelse(grepl("Cardiolipin Biosynthesis",pathway_new),
                                   "Cardiolipin Biosynthesis",pathway_new)) %>%
  dplyr::mutate(pathway_new=ifelse(grepl("Phosphatidylethanolamine Biosynthesis",pathway_new),
                                   "Phosphatidylethanolamine Biosynthesis",pathway_new)) %>%
  dplyr::mutate(pathway_new=ifelse(grepl("Mitochondrial Beta-Oxidation",pathway_new),
                                   "Mitochondrial Beta-Oxidation",pathway_new)) %>%
  dplyr::select(-"pathway") %>%
  dplyr::rename("pathway"="pathway_new") %>%
  dplyr::mutate(src_new = ifelse(src_type=="KEGGCOMP",src,dest),
                       dest_new = ifelse(src_type=="KEGGCOMP",dest,src),
                       src_type_new=ifelse(src_type=="KEGGCOMP",src_type,dest_type),
                       dest_type_new=ifelse(src_type=="KEGGCOMP",dest_type,src_type)) %>%
  dplyr::mutate(src_new1=ifelse(src_type=="KEGGCOMP" & dest_type=="KEGGCOMP",
                                ifelse(src_new>dest_new,src_new,dest_new),
                                src_new)) %>%
  dplyr::mutate(dest_new1=ifelse(src_type=="KEGGCOMP" & dest_type=="KEGGCOMP",
                                 ifelse(src_new>dest_new,dest_new,src_new),
                                 dest_new)) %>%
  dplyr::select(-c("src_type","src","dest_type","dest","src_new","dest_new")) %>%
  dplyr::rename("src_type"="src_type_new","src"="src_new1","dest_type"="dest_type_new","dest"="dest_new1") %>%
  dplyr::select(c("src_type","src","dest_type","dest","pathwayid","pathway")) %>%
  unique()

### reactome

## metabolism pathway ###
reactome_metabolism_pathwayid <- data.table::fread("result/Graphite/ReactomePathwaysRelation.txt",header=F) %>%
  as.data.frame() %>%
  dplyr::filter(V1=="R-HSA-1430728")

reactome_metabolism_data <- data.table::fread("result/Graphite/gene-metabolite-reactome.txt") %>%
  as.data.frame() %>%
  dplyr::filter(pathwayid %in% reactome_metabolism_pathwayid$V2) %>%
  dplyr::filter(src_type=="KEGGCOMP"|dest_type=="KEGGCOMP") %>%
  dplyr::mutate(src_new = ifelse(src_type=="KEGGCOMP",src,dest),
                dest_new = ifelse(src_type=="KEGGCOMP",dest,src),
                src_type_new=ifelse(src_type=="KEGGCOMP",src_type,dest_type),
                dest_type_new=ifelse(src_type=="KEGGCOMP",dest_type,src_type)) %>%
  dplyr::mutate(src_new1=ifelse(src_type=="KEGGCOMP" & dest_type=="KEGGCOMP",
                               ifelse(src_new>dest_new,src_new,dest_new),
                               src_new)) %>%
  dplyr::mutate(dest_new1=ifelse(src_type=="KEGGCOMP" & dest_type=="KEGGCOMP",
                                ifelse(src_new>dest_new,dest_new,src_new),
                                dest_new)) %>%
  dplyr::select(-c("src_type","src","dest_type","dest","src_new","dest_new")) %>%
  dplyr::rename("src_type"="src_type_new","src"="src_new1","dest_type"="dest_type_new","dest"="dest_new1") %>%
  dplyr::select(c("src_type","src","dest_type","dest","pathwayid","pathway")) %>%
  unique()
write.table(reactome_metabolism_data,"result/Graphite/gene-metabolite-reactome_metabolism.txt",quote=F,sep="\t",row.names=F)

### wikipathways

wikipathway_metabolism_id <- data.table::fread("wikipath/test_metabolic.txt",header=F) %>%
  as.data.frame()

wikipathway_data <- data.table::fread("result/Graphite/gene-metabolite-wikipaths.txt") %>%
  as.data.frame() %>%
  dplyr::filter(pathwayid %in% wikipathway_metabolism_id$V1) %>%
  dplyr::filter(src_type=="KEGGCOMP"|dest_type=="KEGGCOMP") %>%
  dplyr::mutate(src_new = ifelse(src_type=="KEGGCOMP",src,dest),
                dest_new = ifelse(src_type=="KEGGCOMP",dest,src),
                src_type_new=ifelse(src_type=="KEGGCOMP",src_type,dest_type),
                dest_type_new=ifelse(src_type=="KEGGCOMP",dest_type,src_type)) %>%
  dplyr::mutate(src_new1=ifelse(src_type=="KEGGCOMP" & dest_type=="KEGGCOMP",
                                ifelse(src_new>dest_new,src_new,dest_new),
                                src_new)) %>%
  dplyr::mutate(dest_new1=ifelse(src_type=="KEGGCOMP" & dest_type=="KEGGCOMP",
                                 ifelse(src_new>dest_new,dest_new,src_new),
                                 dest_new)) %>%
  dplyr::select(-c("src_type","src","dest_type","dest","src_new","dest_new")) %>%
  dplyr::rename("src_type"="src_type_new","src"="src_new1","dest_type"="dest_type_new","dest"="dest_new1") %>%
  dplyr::select(c("src_type","src","dest_type","dest","pathwayid","pathway")) %>%
  unique()
write.table(wikipathway_data,"result/Graphite/gene-metabolite-wikipathway_metabolism.txt",quote=F,sep="\t",row.names=F)

2.3.4 Combine the data from database kegg, wikipathway, reactome, smpdb and then uniq the data

cat gene-metabolite-*metabolism.txt |cut -f1-4,6|sort|uniq >result.graphite.txt

2.4 BiGG

2.4.1 Down all the models in the BiGG

curl 'http://bigg.ucsd.edu/api/v2/models'

Then, choice the model from Homo sapiens,then in reserve model is iAT_PLT_636, iAB_RBC_283, RECON1, Recon3D.

Get all the reactions names in the 4 human models.

curl 'http://bigg.ucsd.edu/api/v2/models/iAT_PLT_636/reactions' >result/BiGG/iAT_PLT_636.reactions
curl 'http://bigg.ucsd.edu/api/v2/models/iAB_RBC_283/reactions' >result/BiGG/iAB_RBC_283.reactions
curl 'http://bigg.ucsd.edu/api/v2/models/RECON1/reactions' >result/BiGG/RECON1.reactions
curl 'http://bigg.ucsd.edu/api/v2/models/Recon3D/reactions' >result/BiGG/Recon3D.reactions

Change the json reaction names to txt file.

tt<-jsonlite::stream_in(file("result/BiGG/iAB_RBC_283.reactions"),pagesize = 100)
write.table(tt$results[[1]],"result/BiGG/iAB_RBC_283.reactions.txt",quote=F,row.names=F,col.names=F)

2.4.2 Download the every reaction

curl 'http://bigg.ucsd.edu/api/v2/models/iAT_PLT_636/reactions/10FTHF6GLUtm'    >result/BiGG/iAT_PLT_636/reaction/json/10FTHF6GLUtm.txt
curl 'http://bigg.ucsd.edu/api/v2/models/iAT_PLT_636/reactions/10FTHF7GLUtl'    >result/BiGG/iAT_PLT_636/reaction/json/10FTHF7GLUtl.txt

Get the information in every reaction.

The R script that change the reaction information in json to txt.

args <- commandArgs(T)

library(dplyr)

mydata <- paste0("result/BiGG/",args[1],"/reaction/json/",args[2],".txt")

recon1<-jsonlite::stream_in(file(mydata),pagesize = 100)

metabolite_biggid <- recon1$metabolites[[1]]$bigg_id
metabolites <- recon1$metabolites[[1]]$name
compartment_bigg_id <- recon1$metabolites[[1]]$compartment_bigg_id

gene <- recon1$results[[1]]$genes[[1]]$name

subsystem <- recon1$results[[1]]$subsystem

model=args[1]

if (length(gene)>0) {

  dd <- data.frame(metabolite_biggid1=paste(metabolite_biggid,compartment_bigg_id,sep="_"),src_type="metabolite",src=metabolites,metabolite_biggid2=NA,dest_type="gene",dest=paste(gene,collapse=";"),subsystems=subsystem,models=model)

#  write.table(dd,paste0(args[1],"_txt/",args[2],".txt"),quote=F,sep="\t",row.names=F)
}else {
  dd <- data.frame()
}

if (length(unique(recon1$metabolites[[1]]$stoichiometry))>1) {

metabolites_stoichiometry1 <- recon1$metabolites[[1]] %>%
  dplyr::filter(stoichiometry>0)

metabolites_stoichiometry2 <- recon1$metabolites[[1]] %>%
  dplyr::filter(stoichiometry<0)

result <- data.frame()
for (i in 1:nrow(metabolites_stoichiometry1)) {
  for (j in 1:nrow(metabolites_stoichiometry2)) {
    temp <- data.frame(metabolite_biggid1=paste(metabolites_stoichiometry1$bigg_id[i],metabolites_stoichiometry1$compartment_bigg_id[i],sep="_"),src_type="metabolite",src=metabolites_stoichiometry1$name[i],
                       metabolite_biggid2=paste(metabolites_stoichiometry2$bigg_id[j],metabolites_stoichiometry2$compartment_bigg_id[j],sep="_"),dest_type="metabolite",dest=metabolites_stoichiometry2$name[j],subsystems=subsystem,models=model)
    result <- rbind(result,temp)
  }
}

result_final <- rbind(dd,result)

write.table(result_final,paste0("result/BiGG/",args[1],"/reaction/txt/",args[2],".txt"),quote=F,sep="\t",row.names=F)
}else {
print(0)
}

Run the R script that change the reaction information in json to txt in batch.

cat iAB_RBC_283.reaction.txt |while read line
do
  Rscript reaction_json2txt.R iAB_RBC_283 $line
done

cat iAT_PLT_636.reaction.txt |while read line
do
  Rscript reaction_json2txt.R iAT_PLT_636 $line
done

cat RECON1.reaction.txt |while read line
do
  Rscript reaction_json2txt.R RECON1 $line
done

cat Recon3D.reaction.txt |while read line
do
  Rscript reaction_json2txt.R Recon3D $line
done

Combine all the reactions info include the gene-metabolite pair,the metabolite-metabolite pair,subsystem,model.

cat result/BiGG/iAB_RBC_283/reaction/txt/*|grep -v metabolite_biggid1 >result/BiGG/cat_reaction_info_iAB_RBC_283.txt
cat result/BiGG/iAT_PLT_636/reaction/txt/*|grep -v metabolite_biggid1 >result/BiGG/cat_reaction_info_iAT_PLT_636.txt
cat result/BiGG/Recon3D/reaction/txt/*|grep -v metabolite_biggid1 >result/BiGG/cat_reaction_info_Recon3D.txt
cat result/BiGG/RECON1/reaction/txt/*|grep -v metabolite_biggid1 >result/BiGG/cat_reaction_info_RECON1.txt

2.4.3 Exact all the metabolites names in every model in BiGG

curl 'http://bigg.ucsd.edu/api/v2/models/iAB_RBC_283/metabolites' >result/BiGG/iAB_RBC_283_metabolite.json
curl 'http://bigg.ucsd.edu/api/v2/models/iAT_PLT_636/metabolites' >result/BiGG/iAT_PLT_636_metabolite.json
curl 'http://bigg.ucsd.edu/api/v2/models/Recon3D/metabolites' >result/BiGG/Recon3D_metabolite.json
curl 'http://bigg.ucsd.edu/api/v2/models/RECON1/metabolites' >result/BiGG/RECON1_metabolite.json

Change the json to txt.

args=commandArgs(T)
recon1<-jsonlite::stream_in(file(paste0("result/BiGG/",args[1],"_metabolite.json")),pagesize = 100)

aa <- recon1$results[[1]]
write.table(aa$bigg_id,paste0("result/BiGG/",args[1],"_metabolite.txt"),row.names = F,col.names=F,sep="\t",quote=F)

Use the script.

Rscript metabolitename_json2txt.R iAB_RBC_283
Rscript metabolitename_json2txt.R iAT_PLT_636
Rscript metabolitename_json2txt.R RECON1
Rscript metabolitename_json2txt.R Recon3D

Download every metabolite information.

curl 'http://bigg.ucsd.edu/api/v2/models/iAB_RBC_283/metabolites/13dpg_c'       >result/BiGG/iAB_RBC_283_metabolites/13dpg_c.json
curl 'http://bigg.ucsd.edu/api/v2/models/iAB_RBC_283/metabolites/23dpg_c'       >result/BiGG/iAB_RBC_283_metabolites/23dpg_c.json
curl 'http://bigg.ucsd.edu/api/v2/models/iAB_RBC_283/metabolites/2kmb_c'        >result/BiGG/iAB_RBC_283_metabolites/2kmb_c.json
curl 'http://bigg.ucsd.edu/api/v2/models/iAB_RBC_283/metabolites/2pg_c' >result/BiGG/iAB_RBC_283_metabolites/2pg_c.json

The R script that exact the KEGG ID in metabolite file.

args <- commandArgs(T)
mydata <- paste0("result/BiGG/",args[1],"/metabolite/json/",args[2],".json")
# "iAB_RBC_283_metabolites/glu__L_c.json"
recon1<-jsonlite::stream_in(file(mydata),pagesize = 100)
kegg_id <- paste(recon1$database_links$`KEGG Compound`[[1]]$id,collapse = ";")
name <- recon1$name

result <- data.frame(name=name,kegg_id=kegg_id,source=args[1])
write.table(result,paste0("result/BiGG/",args[1],"/metabolite/txt/",args[2],".txt"),quote=F,row.names = F,sep="\t")

Run the R script that exact the KEGG ID in the metabolite file.

cat result/BiGG/iAB_RBC_283_metabolite.txt|while read line
do
  Rscript metabolite_json2txt.R iAB_RBC_283 ${line}
done

cat result/BiGG/iAT_PLT_636_metabolite.txt |while read line
do
  Rscript metabolite_json2txt.R iAT_PLT_636 ${line}
done

cat result/BiGG/RECON1_metabolite.txt|while read line
do
  Rscript metabolite_json2txt.R RECON1 ${line}
done

cat result/BiGG/Recon3D_metabolite.txt|while read line
do
  Rscript metabolite_json2txt.R Recon3D ${line}
done

Combine all the metabolite in 1 file for every model.

cat result/BiGG/iAB_RBC_283/metabolite/txt/*_metabolite.txt|grep -v kegg_id >result/BiGG/iAB_RBC_283_metabolite_all.txt
cat result/BiGG/iAT_PLT_636/metabolite/txt/*_metabolite.txt |grep -v kegg_id >result/BiGG/iAT_PLT_636_metabolite_all.txt
cat result/BiGG/RECON1/metabolite/txt/*_metabolite.txt|grep -v kegg_id >result/BiGG/RECON1_metabolite_all.txt
cat result/BiGG/Recon3D/metabolite/txt/*_metabolite.txt|grep -v kegg_id >result/BiGG/Recon3D_metabolite_all.txt

2.4.4 The R script that change the metabolite name to kegg id in the final output

args <- commandArgs(T)

library(dplyr)

metabolite_info <- data.table::fread(paste0("result/BiGG/",args[1],"_metabolite_all.txt"),header=F) %>%
  as.data.frame()

gene_metabolite_pairs <- data.table::fread(paste0("result/BiGG/cat_reaction_info_",args[1],".txt"),header=F) %>%
  as.data.frame() %>%
  dplyr::left_join(metabolite_info,by=c("V3"="V1")) %>%
  dplyr::left_join(metabolite_info,by=c("V6"="V1")) %>%
  dplyr::mutate(src=ifelse(V2.y=="" | is.na(V2.y),
                           V3.x,V2.y)) %>%
  dplyr::mutate(dest=ifelse(V2=="" | is.na(V2),
                            V6,V2)) %>%
  dplyr::select(c("V2.x","src","V5","dest","V7","V8")) %>%
  dplyr::rename("src_type"="V2.x","dest_type"="V5","subsystems"="V7","model"="V8") %>%
  tidyr::separate_rows(src,sep=";") %>%
  tidyr::separate_rows(dest,sep=";") %>%
  dplyr::filter(src != dest) %>%
  unique()
write.table(gene_metabolite_pairs,paste0("result/BiGG/result_",args[1],".txt"),quote=F,row.names = F,sep="\t")

Use the R script that change the metabolite name to KEGG ID.

Rscript result.R iAB_RBC_283
Rscript result.R iAT_PLT_636
Rscript result.R RECON1
Rscript result.R Recon3D
cat result_*.txt |cut -f1-5 |sort|uniq >result/BiGG/result.BiGG.txt

2.4.5 Combine the data from BiGG and graphite

 cat result/BiGG/result.BiGG.txt ../graphite/result.graphite.txt |sed s/KEGGCOMP/metabolite/g |sed s/SYMBOL/gene/g |sort|uniq >gene-metabolite_BiGG_graphite.txt

dat <- data.table::fread("/Users/guituantuan/Desktop/projects/database/gene-metabolite/gene-metabolite_BiGG_graphite.txt") %>%
  as.data.frame() %>%
  dplyr::filter(keggId!=gene) %>%
  dplyr::mutate(keggId_new=ifelse(src_type=="metabolite" & dest_type=="metabolite",
                                ifelse(keggId>gene,keggId,gene),
                                keggId)) %>%
  dplyr::mutate(gene_new=ifelse(src_type=="metabolite" & dest_type=="metabolite",
                                 ifelse(keggId>gene,gene,keggId),
                                 gene)) %>%
  dplyr::select(-c("keggId","gene")) %>%
  dplyr::rename("keggId"="keggId_new","gene"="gene_new") %>%
  dplyr::select(c("src_type","keggId","dest_type","gene","subsystems"))

dat$subsystems <- stringr::str_to_title(dat$subsystems)

kegg_pathway1 <- kegg_pathway %>%
  dplyr::mutate(pathway1=stringr::str_to_title(PATHWAY)) %>%
  dplyr::select(c("pathway1","pathway_type")) %>%
  dplyr::rename("subsystems"="pathway1") %>%
  unique()

pathway_type_1 <- data.table::fread("pathway_type.txt",header=F) %>%
  as.data.frame()

names(pathway_type_1) <- c("subsystems","pathway_type")
aa <- rbind(kegg_pathway1,pathway_type_1)

dat2 <- dat %>%
  dplyr::mutate(subsystems=ifelse(subsystems=="Urea Cycle/Amino Group Metabolism","Urea Cycle",subsystems)) %>%
  dplyr::mutate(subsystems=ifelse(subsystems=="Nucleotides","Nucleotide Interconversion",subsystems)) %>%
  dplyr::mutate(subsystems=ifelse(subsystems=="Citric Acid Cycle","TCA Cycle",subsystems)) %>%
  dplyr::mutate(subsystems=ifelse(subsystems=="Citrate Cycle (Tca Cycle)","TCA Cycle",subsystems)) %>%
  dplyr::mutate(subsystems=ifelse(subsystems=="The Citric Acid (Tca) Cycle And Respiratory Electron Transport","TCA Cycle",subsystems)) %>%
  dplyr::mutate(subsystems=ifelse(grepl("Vitamin",subsystems),"Vitamin Metabolism",subsystems)) %>%
  dplyr::mutate(subsystems=ifelse(grepl("Tca Cycle",subsystems),"TCA Cycle",subsystems)) %>%
  dplyr::mutate(subsystems=ifelse(grepl("Amino Acid",subsystems),"Amino Acid Metabolism",subsystems)) %>%
  dplyr::mutate(subsystems=ifelse(grepl("Coa ",subsystems),"CoA Metabolism",subsystems)) %>%
  dplyr::mutate(subsystems=ifelse(grepl("Fatty Acid",subsystems),"Fatty Acid Metabolism",subsystems)) %>%
  dplyr::mutate(subsystems=ifelse(subsystems=="Fatty Acid Oxidation","Fatty Acid Metabolism",subsystems)) %>%
  dplyr::mutate(subsystems=ifelse(subsystems=="Aminosugar Metabolism","Amino Sugar Metabolism",subsystems)) %>%
  unique() %>%
  dplyr::left_join(aa,by="subsystems")


write.table(dat2,"/Users/guituantuan/Desktop/projects/database/gene-metabolite/gene-metabolite_BiGG_graphite_uniq.txt",
            quote=F,row.names=F,sep="\t")