前者:
for (row in 1:nrow(gterms)) {
gene_terms <- str_split(gterms[row,"GOs"], ",", simplify = FALSE)[[1]]
gene_id <- gterms[row, "query"][[1]]
tmp <- data_frame(gene = rep(gene_id, length(gene_terms)), term = gene_terms)
dim(tmp)
gene2go <- rbind(gene2go, tmp)}
gene_ids <- egg$query
gene_ids
eggnog_lines_with_go <- egg$GOs != ""
eggnog_annoations_go <- str_split(egg[eggnog_lines_with_go,]$GOs, ",")
后者:
gene_to_go <- data.frame(gene = rep(gene_ids[eggnog_lines_with_go],
times = sapply(eggnog_annoations_go, length)), term = unlist(eggnog_annoations_go))
head(gene_to_go)