R语言中merge函数
001、合并取交集
name1 <- c("aa", "bb", "cc", "dd") gender <- c("m", "m", "m", "f") age <- c(13, 14, 14, 15) dat1 <- data.frame(name1, gender, age) name2 <- c("xx", "bb", "yy", "dd") math <- c(89, 67, 87, 68) eng <- c(87, 88, 68, 65) dat2 <- data.frame(name2, math, eng) dat1 ## 测试数据框 dat2 merge(dat1, dat2, by.x = "name1", by.y = "name2") ## 分别对两个数据框指定列,按照重合列进行合并

002、合并取交集
name <- c("aa", "bb", "cc", "dd") gender <- c("m", "m", "m", "f") age <- c(13, 14, 14, 15) dat1 <- data.frame(name, gender, age) name <- c("xx", "bb", "yy", "dd") math <- c(89, 67, 87, 68) eng <- c(87, 88, 68, 65) dat2 <- data.frame(name, math, eng) dat1 dat2 merge(dat1, dat2, by = "name") ## 列名相同时,直接使用by。

003、合并取并集
name1 <- c("aa", "bb", "cc", "dd") gender <- c("m", "m", "m", "f") age <- c(13, 14, 14, 15) dat1 <- data.frame(name1, gender, age) name2 <- c("xx", "bb", "yy", "dd") math <- c(89, 67, 87, 68) eng <- c(87, 88, 68, 65) dat2 <- data.frame(name2, math, eng) dat1 dat2 merge(dat1, dat2, by.x = "name1", by.y = "name2", all = T) ## all = T表示合并取并集,缺失值用NA填充

004、仅对其中一个数据框取并集
(1)、dat1并集
name1 <- c("aa", "bb", "cc", "dd") gender <- c("m", "m", "m", "f") age <- c(13, 14, 14, 15) dat1 <- data.frame(name1, gender, age) name2 <- c("xx", "bb", "yy", "dd") math <- c(89, 67, 87, 68) eng <- c(87, 88, 68, 65) dat2 <- data.frame(name2, math, eng) dat1 dat2 merge(dat1, dat2, by.x = "name1", by.y = "name2", all.x = T, all.y = F) ## 保留dat1所有行

(2)、dat2并集
name1 <- c("aa", "bb", "cc", "dd") gender <- c("m", "m", "m", "f") age <- c(13, 14, 14, 15) dat1 <- data.frame(name1, gender, age) name2 <- c("xx", "bb", "yy", "dd") math <- c(89, 67, 87, 68) eng <- c(87, 88, 68, 65) dat2 <- data.frame(name2, math, eng) dat1 dat2 merge(dat1, dat2, by.x = "name1", by.y = "name2",all.x = F, all.y = T) ## dat2取并集

005、没有同名(或者不指定)列时合并所有
name1 <- c("aa", "bb", "cc", "dd") gender <- c("m", "m", "m", "f") age <- c(13, 14, 14, 15) dat1 <- data.frame(name1, gender, age) name2 <- c("xx", "bb", "yy", "dd") math <- c(89, 67, 87, 68) eng <- c(87, 88, 68, 65) dat2 <- data.frame(name2, math, eng) dat1 dat2 merge(dat1, dat2) ## 没有同名列,也不指定,合并所有

006、是否对合并后的数据排序
name1 <- c("xx", "dd", "yy", "bb") gender <- c("m", "m", "m", "f") age <- c(13, 14, 14, 15) dat1 <- data.frame(name1, gender, age) name2 <- c("xx", "yy", "cc", "bb") math <- c(89, 67, 87, 68) eng <- c(87, 88, 68, 65) dat2 <- data.frame(name2, math, eng) dat1 dat2 merge(dat1, dat2, by.x = "name1", by.y = "name2", sort = T) merge(dat1, dat2, by.x = "name1", by.y = "name2", sort = F) ## 合并后不进行排序

参考:https://blog.csdn.net/chongbaikaishi/article/details/115740560

浙公网安备 33010602011771号