发表级ggplot绘图流程和技巧 | 论文发表

The R Graph Gallery - R绘图代码库

themes - https://www.r-graph-gallery.com/ggplot2-package.html#themes

 

要开始修饰以前的核心图片,准备发表论文了。

把之前比较raw的图修饰格式,统一生成高清晰图片,准备放入paper中。

会慢慢补充所有常见的绘图代码。

 

一个raw image的代码:

p1 <- ggplot(oxidation.df, aes(x=group, y=score, color=group)) + 
        geom_boxplot() + 
        geom_jitter(shape=16, position=position_jitter(0.2)) +
        labs(title = "Fatty acid metabolism")
p1

  

第一步:修改df里的标签

oxidation.df$group <- plyr::mapvalues(oxidation.df$group, 
                                      from = c("GFP- early","GFP+ early","GFP- late","GFP+ late"),
                                      to = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))

  

第二步:修改标签顺序

oxidation.df$group <- factor(oxidation.df$group, levels = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))

  

第三部:精修格式主题字体

主题

常用的主题:https://www.r-graph-gallery.com/ggplot2-package.html#themes

  • theme_bw - 去掉了灰白背景,加了边框,最常用
  • theme_classic - 只留下了加粗的左下边框,最经典,适合实验图
  • egg::theme_article - 只有四周的边框,最适合发表文章,缺点:图例间隔太小
  • theme_minimal - 只留下了grid,没有边框
  • theme_minimal_hgrid - 只留下了hgrid
  • theme_void - 只留下了图例,适合tSNE图
theme_bw()
theme_void()
# remove grid
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

 

修改title

labs(x = "", y = "Pathway score\n", title = "Fatty acid metabolism")

xy轴标签字体大小

theme(axis.text.x  = element_text(face="plain", angle=30, size = 14, color = "black", vjust=0.6),
        axis.text.y  = element_text(size = 10),
        axis.title.y = element_text(size = 14))

去掉多余的图例

theme(legend.position = "none")

 

填充颜色

library(RColorBrewer)
scale_fill_manual(values=brewer.pal(9,"Paired"))
scale_color_manual(values=brewer.pal(9,"Paired")[c(3,4,5,6)])

 

其他

限制xy坐标范围

scale_x_continuous(limits = c(0,2.5))
scale_y_continuous(limits = c(0,2.5))

  

一组数据的比较

library(ggpubr)
stat_compare_means(label.y = 2.3, label.x = 1, size=5)

  

多组数据的比较

library(ggpubr)
my_comparisons <- list(c("HhOFF early", "HhON early"), c("HhOFF late", "HhON late"))

stat_compare_means(method = "anova", label.y = 1.29) + # global
stat_compare_means(comparisons = my_comparisons, label.y = 1, label = "p.signif") + # paired
scale_y_continuous(limits = c(-0.52, 1.3))

  

代码汇总

tmp$group <- plyr::mapvalues(tmp$group, from = c("GFP- early","GFP+ early","GFP- late","GFP+ late"),
                                      to = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))

tmp$group <- factor(tmp$group, levels = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))

library(ggpubr)
my_comparisons <- list(c("HhOFF early", "HhON early"), c("HhOFF late", "HhON late"))

options(repr.plot.width=4, repr.plot.height=4)
p1 <- ggplot(tmp, aes(x=group, y=score, color=group)) + 
        geom_boxplot() + 
        theme_bw() +
        labs(x = "", y = "Pathway score\n", title = "Fatty acid metabolism") +
        geom_jitter(shape=16, position=position_jitter(0.2)) +
        theme(legend.position = "none") + 
        theme(axis.text.x  = element_text(face="plain", angle=30, size = 14, color = "black", vjust=0.6),
        axis.text.y  = element_text(size = 10),
        axis.title.y = element_text(size = 14)) +
        # scale_fill_manual(values=brewer.pal(9,"Paired"))
        scale_color_manual(values=brewer.pal(9,"Paired")[c(3,4,5,6)]) +
        stat_compare_means(method = "anova", label.y = 1.29) + # global
        stat_compare_means(comparisons = my_comparisons, label.y = 1, label = "p.signif") + # paired
        scale_y_continuous(limits = c(-0.52, 1.3))
p1

  

多图拼接

options(repr.plot.width=8, repr.plot.height=9)
cowplot::plot_grid(p1,p2,p3,p4,ncol = 2)

  

PDF出图

ggsave(filename = "HhOFF HhON metabolic pathways.pdf", width = 8, height = 9)

  

有些图不能这么保存,比如heatmap,这时就要用到pdf函数

# traditional save
pdf("manuscript/HSCR.cluster.heatmap.pdf", width=8, height=7)
p
dev.off()

  

 

lnkscape里修改文字【对齐,上下标等等】

 

 

OK, 一个准发表级的图就制作好了,可能还需要精修。


 

其余细节

 

点的类型

# change the border of point
geom_point(shape = 21, colour = "black", fill = "white", size = 5, stroke = 5)

 

把点拟合成线

stat_smooth(method = "loess", size = 1.1, se = F, span = 0.2)

 

图例,比如改legend title,改点大小,去掉图例

labs(x = "\nTranscriptional level",y = "\nPost-transcriptional level", title = "", color = "Clinical score")
# change legend dot size
guides(colour = guide_legend(override.aes = list(size=10)))
# ggplot remove legend title
theme(legend.title = element_blank())
# position
theme(llegend.text = element_text(size = 12), legend.position = c(0.8, 0.75))

 

标题格式,比如居中

theme(plot.title = element_text(hjust = 0.5, size = 18))

  

去掉边框,轴线,刻度;去掉右上边框

# empty border, ticks, text
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_blank()) +
labs(x = "",y = "", title = "") +
theme(axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank())
# remove top and right border
theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank())

 

坐标轴,比如修改起点,范围

# force y start from 0
scale_y_continuous(expand = c(0, 0), limits = c(0, NA))

  

去掉画布中的网格线条

# just remove inside grid
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

  

添加文本

# add text
annotate("text", label = "Wilcoxon test\nP-value = 1.48e-12", x = 0.5, y =2, size = 6, colour = "black")

 

添加背景色

# add background color to mark different region
geom_rect(xmin=0, xmax=2.5, ymin=-2, ymax=-1, fill="#4DAF4A", alpha=1, color=NA)

  

修改填充颜色

# color
scale_color_manual(values=brewer.pal(9,"Set1")[c(1:5,7:9)])

 

精准控制圈图的两种alpha,比如venn图

scale_color_manual(values = sample.colors) +
scale_fill_manual(values = alpha(sample.colors, .2))

  

快速统计分析

# quick statistic testing
# Wilcoxon test
wilcox.test(subset(time.df,GeneSet=="Common risk")$Time,
subset(time.df,GeneSet=="L-HSCR specific")$Time, alternative = "two.sided")

 

# packages
# significance
https://github.com/const-ae/ggsignif

分组计算,如取mean

# quick data process
# get group mean
weather %>% group_by(city) %>% summarise(mean_temperature = mean(temperature))

  

小数点保留,科学计数法

# format decimals
formatC(0.46, format = "e", digits = 1)
library(scales)
scientific(0.46, digits = 2)

  

查看默认的颜色 - 画图的结果数据

# see the colors in ggplot
# To see what colors are used to make your plot you can use function ggplot_build() and then look at data part of this object (in column colour are codes).
ggplot_build(p)$data

  

其他图种

配对的箱线图、柱状图、折线图 - 用于比较case和control

data

这里想加点需要用另一个函数geom_dotplot

    lineage	lineage.sub	stage	S.Score	G2M.Score	cc.score
    <chr>	<fct>	<chr>	<dbl>	<dbl>	<dbl>
ctrl_AAACCTGAGACATAAC	NP	NPlate	Control	-0.8162696	-0.98076576	-0.8162696
ctrl_AAACCTGCAAGTAATG	BP	BP	Control	0.3118349	-0.05584626	0.3118349
ctrl_AAACCTGCATGCTAGT	GP	GP	Control	0.4443853	0.27702244	0.4443853
# http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/
library(ggpubr)

options(repr.plot.width=5, repr.plot.height=4)
p <- ggplot(cc.df, aes(x=lineage.sub, y=cc.score, fill=stage)) +
  geom_boxplot(position=position_dodge(1)) +
  geom_dotplot(binaxis='y', stackdir='center', position = "dodge", dotsize=0.15, binwidth=1/25, binpositions="all") +
  theme_bw() +
  labs(x = "", y = "Proliferation score\n", title = "") +
  theme(axis.text.x  = element_text(face="plain", angle=0, size = 14, color = "black", vjust=0.6),
        axis.text.y  = element_text(size = 10),
        axis.title.y = element_text(size = 14)) +
  scale_fill_manual(values=c("blue","red")) +
  stat_compare_means(aes(group = stage), label = "p.signif", label.y = 4) +
  theme(legend.title = element_blank())
p

  

封装好的函数

ggbarplot(ToothGrowth, x = "dose", y = "len", add = "mean_se",
          color = "supp", palette = "jco", 
          position = position_dodge(0.8))+
  stat_compare_means(aes(group = supp), label = "p.signif", label.y = 29)
ggline(ToothGrowth, x = "dose", y = "len", add = "mean_se",
          color = "supp", palette = "jco")+
  stat_compare_means(aes(group = supp), label = "p.signif", 
                     label.y = c(16, 25, 29))

  

热图 - 最直观

# heatmap
https://jokergoo.github.io/ComplexHeatmap-reference/book/

 

小提琴图marker - 分布

stacked violin plot for visualizing single-cell data in Seurat

参见:mouse/singleCell/case/Kif7_ENCC/Kif7-integration/integration_public_and_Kif7.ipynb 

 

Venn韦恩图/UpSetR图 - 交集

R绘制韦恩图 | Venn图 | UpSetR图

 

Beeswarm Plot 蜂群图 - 序列数据展开

https://github.com/eclarke/ggbeeswarm

#With different beeswarm point distribution priority
dat <- data.frame(x=rep(1:3,c(20,40,80)))
dat$y <- rnorm(nrow(dat),dat$x)
dat$z <- 1
ggplot(dat, aes(z,y)) + 
    geom_beeswarm(size=2,priority='descending', cex=3) + 
    ggtitle('Descending') + 
    scale_x_continuous(expand=expansion(add=c(0.5, 0.5)))

  

我的代码

set.seed(49)
library(ggplot2)
library(ggbeeswarm)

pca_HSCR2$z <- 1
pca_HSCR2$pseudotime <- -pca_HSCR2$X2

options(repr.plot.width=6, repr.plot.height=4)
ggplot(pca_HSCR2, aes(x=z, y=pseudotime, fill=severity, color=severity)) + 
    geom_beeswarm(size=1.2,priority='ascending', cex=1.4) + 
    # ggtitle('ascending') + # Descending
    scale_x_continuous(expand=expansion(add=c(0.5, 0.5))) +
    coord_flip() +
    theme_void() +
    scale_color_manual(values=severity.colors)

  

基因模块在pseudotime表达的line图

参见:mouse/singleCell/case/Kif7_ENCC/Kif7/Kif7_basic_analysis.ipynb

 

火山图/对角线图 - 特殊散点图  

参考:mouse/singleCell/case/Kif7_ENCC/Kif7-integration/Ezh2_analysis.ipynb

# prepare data
log2FC <- data.frame(gene=rownames(HSCR.DEG.log2FC.df.final), S_log2FC=S.log2FC, L_log2FC=L.log2FC)

# add color label
log2FC$color <- "none"
log2FC[log2FC$gene %in% c("HDAC1"),]$color <- "red"

# the genes want to be labeled
label.genes <- c('RAMP2', 'HEY1', 'STAMBP', 'CCNB1IP1', 'LMOD3', 'NUP107', 'HEY2', 'FOXO1', 'CRLF1', 'ZFP36L2', 'NR2F2', 'TUBB3', 
                'ZNF385A', 'TMEM14C', 'FLNA', 'TFAP2A', 'SOX11', 'HDAC1', 'GLI3', 'BCL11A')
label.df <- subset(log2FC, gene %in% label.genes)

options(repr.plot.width=4.5, repr.plot.height=5)
library(ggplot2)
library("ggrepel")
# Basic scatter plot
ggplot(log2FC, aes(x=S_log2FC, y=L_log2FC, color=color)) + # , color=coregene
    geom_hline(yintercept=0) +
    geom_vline(xintercept=0) +
    geom_abline(intercept = 0, slope = 1, color="black", linetype="dashed", size=1) +
    geom_point(size=0.5) +
    geom_point(data = label.df, size=2, color = "red") +
    theme_bw() +
    labs(x = "\nLog2FC in S-HSCR",y = "Log2FC in L-HSCR", title = "") +
    theme(legend.title=element_blank()) +
    # Change fontface. Allowed values : 1(normal), 2(bold), 3(italic), 4(bold.italic)
    geom_text_repel(data=label.df, aes(label = gene), size = 3.5, fontface=3, color="red",
                    box.padding = 0.4, max.overlaps = Inf) +
    theme(legend.position = "none", 
        axis.text  = element_text(size = 10),
        # axis.text.y  = element_text(size = 10),
        axis.title = element_text(size = 16, face="plain")) +
    scale_x_continuous(limits = c(-8, 8)) +
    scale_y_continuous(limits = c(-8, 8)) +
    scale_color_manual(values=c("grey","red"))

  

 

 

 

 



 

posted @ 2021-02-04 18:41  Life·Intelligence  阅读(145)  评论(0编辑  收藏
TOP