R语言中绘制火山图

 

001、

# 生成随机数据
set.seed(123)  # 设置种子以便结果可重复
genes <- paste("Gene", 1:1000)  # 基因名称
logFC <- rnorm(1000, mean = 0, sd = 2)  # log2 fold change,均值为0,标准差为2的正态分布
pvalue <- runif(1000, min = 0, max = 1)  # p值,0到1之间的均匀分布
genes
logFC
pvalue
# 创建数据框
data <- data.frame(genes = genes, logFC = logFC, pvalue = pvalue)
head(data)

# 添加一列来标记显著性
data$significant <- ifelse(abs(data$logFC) > 1 & data$pvalue < 0.05, "Significant", "Not Significant")
head(data)
# 绘制火山图
ggplot(data, aes(x = logFC, y = -log10(pvalue), color = significant)) +
  geom_point(alpha = 0.6, size = 1.5) +  # 绘制散点,alpha控制透明度,size控制点的大小
  scale_color_manual(values = c("grey", "red")) +  # 自定义颜色,显著基因用红色
  theme_minimal() +  # 使用简单的主题风格
  labs(title = "Volcano Plot", x = "Log2 Fold Change", y = "-Log10 P-value") +  # 添加标题和轴标签
  theme(plot.title = element_text(hjust = 0.5))  # 将标题居中


ggplot(data, aes(x = logFC, y = -log10(pvalue), color = significant)) +
  geom_point(alpha = 0.6, size = 1.5) +
  scale_color_manual(values = c("grey", "red")) +
  theme_minimal() +
  labs(title = "Volcano Plot", x = "Log2 Fold Change", y = "-Log10 P-value") +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "blue") +  # 添加竖线,表示logFC的阈值
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "blue")  # 添加横线,表示p-value的阈值


# 根据logFC和p-value进一步分类
data$expression <- ifelse(data$logFC > 1 & data$pvalue < 0.05, "Upregulated",
                          ifelse(data$logFC < -1 & data$pvalue < 0.05, "Downregulated", "Not Significant"))

# 查看数据框的前几行,确认分类是否正确
head(data)


ggplot(data, aes(x = logFC, y = -log10(pvalue), color = expression)) +
  geom_point(alpha = 0.6, size = 1.5) +  # 绘制散点,透明度和点的大小
  scale_color_manual(values = c("blue", "grey", "red")) +  # 自定义颜色:下调为蓝色,不显著为灰色,上调为红色
  theme_minimal() +  # 简洁主题
  labs(title = "Volcano Plot", x = "Log2 Fold Change", y = "-Log10 P-value") +  # 添加标题和轴标签
  theme(plot.title = element_text(hjust = 0.5)) +  # 标题居中
  geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +  # 添加阈值竖线
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black")  # 添加阈值横线


# 安装 ggrepel 包
# install.packages("ggrepel")

# 加载 ggrepel 包
library(ggrepel)

# 只标记显著的上调和下调基因
significant_genes <- data[data$expression != "Not Significant", ]

# 绘制带基因标记的火山图
ggplot(data, aes(x = logFC, y = -log10(pvalue), color = expression)) +
  geom_point(alpha = 0.6, size = 1.5) +
  scale_color_manual(values = c("blue", "grey", "red")) +
  theme_minimal() +
  labs(title = "Volcano Plot", x = "Log2 Fold Change", y = "-Log10 P-value") +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") +
  #避免标签重叠的函数,可以让基因名称显得更加清晰。
  geom_text_repel(data = significant_genes, aes(label = genes), 
                  size = 3, box.padding = 0.3, point.padding = 0.3)


ggplot(data, aes(x = logFC, y = -log10(pvalue), color = expression)) +
  geom_point(aes(size = -log10(pvalue), alpha = abs(logFC))) +  # 点的大小和透明度。
  #size = -log10(pvalue):显著性越高的点越大。
  #alpha = abs(logFC):log2 fold change 越大,点越不透明。
  scale_color_manual(values = c("blue", "grey", "red")) +
  theme_minimal() +
  labs(title = "Volcano Plot", x = "Log2 Fold Change", y = "-Log10 P-value") +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black")


# 为数据添加一个模拟条件列
data$condition <- sample(c("Condition A", "Condition B"), nrow(data), replace = TRUE)

ggplot(data, aes(x = logFC, y = -log10(pvalue), color = expression)) +
  geom_point(alpha = 0.6, size = 1.5) +
  scale_color_manual(values = c("blue", "grey", "red")) +
  theme_minimal() +
  labs(title = "Volcano Plot", x = "Log2 Fold Change", y = "-Log10 P-value") +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") +
  facet_wrap(~ condition)  # 按条件分面显示

# 安装 plotly 包
# install.packages("plotly")

# 加载 plotly 包
library(plotly)

p <- ggplot(data, aes(x = logFC, y = -log10(pvalue), color = expression)) +
  geom_point(alpha = 0.6, size = 1.5) +
  scale_color_manual(values = c("blue", "grey", "red")) +
  theme_minimal() +
  labs(title = "Volcano Plot", x = "Log2 Fold Change", y = "-Log10 P-value") +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black")

# 转换为交互式图形
ggplotly(p)

 。

 

 

source:

01、https://blog.csdn.net/zegeai/article/details/142934945

 

posted @ 2025-03-19 12:53  小鲨鱼2018  阅读(73)  评论(0)    收藏  举报