reshape2 tidyr dplyr

reshape2

#getwd()突然想做一个地铁数据分析
#rwdata <- read.table("附件 1 线网乘客O-D数据.txt",sep = ",")
#rwdata1 <-as.matrix(rwdata)#将其转换为矩阵
#typeof("rwdata1")#判断这个数据的类型
#is.matrix(rwdata1)#判断这个数据是否为矩阵
#?heatmap
#heatmap(rwdata1)画热图时矩阵当中的元素必须全部为数字,这个矩阵当中有时间
#rwdata1[1,]=1#修改第一行的值
#head(rwdata1)
#one <- c( 1, 1, 1, 1,1 )
#melt使宽数据变成长 数据,cast使长数据变成宽数据
getwd()

#time<-as.POSIXct(rwdata.txt$进站刷卡时刻,出站刷卡时刻)
#hour<-strftime(time,"%H")
#table(hour)
x <- data.frame(k1 = c(NA,NA,3,4,5), k2 = c(1,NA,NA,4,5),
                data = 1:5)
y <- data.frame(k1 = c(NA,2,NA,4,5), k2 = c(NA,NA,3,4,5),
                data = 1:5)
merge(x, y, by = "k1")
merge(x, y, by = c("k1","k2")) 
install.packages("reshape2")
library("reshape2")



?melt
?dcast
?acast
#acast输出的是matrix;dcast输出的是data.frame
names(airquality) <- tolower(names(airquality))
head(airquality)
read.table()
aql <- melt(airquality) 
head(aql)
aql <- melt(airquality, id.vars = c("month", "day"))
head(aql)

aqw <- dcast(aql, month + day ~ variable)
head(aqw)

head(airquality) 
dcast(aql, month ~ variable)
dcast(aql, month ~ variable, fun.aggregate = mean, na.rm = TRUE)
library(plyr)
dcast(aql, month~ variable,mean,na.rm=T,subset = .(variable=="ozone"))#只计
#算Ozone的均值
dcast(aql, month~ variable,mean,na.rm=T,subset = .(month=="5"))

tidyr

install.packages(c("tidyr","dplyr"))
library("tidyr")#tidyr用来处理一种整洁干净的数据
library("dplyr")
tdata <- mtcars[1:10,1:3]
tdata <- data.frame(name=rownames(tdata),tdata)
#这里将行名添加到数据当中
#gather函数是将宽数据转化为长数据
gather(tdata,key = "Key",value = "Value",cyl,disp,mpg)
gather(tdata,key = "Key",value = "Value",cyl:disp)
#和gather(tdata,key = "Key",value = "Value",cyl,disp)相同
gdata<-gather(tdata,key = "Key",value = "Value",mpg,cyl,-disp)#除去disp
#spread是将长数据转化为宽数据
spread(gdata,key = "Key",value = "Value")
df <- data.frame(x = c(NA, "a.b", "a.d", "b.c"))
separate(df,col=x,into = c("A","B"))
df <- data.frame(x = c(NA, "a.b-c", "a-d", "b-c"))
separate(df,x,into = c("A","B"),sep = "-")
unite(x,col ="AB",A,B,sep="-")

dplyr

library(dplyr)
ls("package:dplyr")#列出其中的函数
dplyr::filter (iris,Sepal.Length >7)#选出sepal.length>7的值
dplyr::distinct(rbind(iris[1:10,],iris[1:15,]))#去除重复数据
dplyr::slice(iris,10:15)#取出数据的任意行
dplyr::sample_n(iris,10)#在数据中随机抽取10行
dplyr::sample_frac(iris,0.1)#按比例随机选取
dplyr::arrange(iris,Sepal.Length)#按sepal.length进行排序(默认从小到大)
dplyr::arrange(iris,desc(Sepal.Length))#按相反方向排序
summarise(iris,avg=mean(Sepal.Length))#统计sepal的平均值
summarise(iris,sum=sum(Sepal.Length))#计算sepal的累加值
#链式操作符%>%:将一个函数的输出作为下一个函数的输入,快捷键shift+ctrl+M
head(mtcars,20) %>% tail()#先取出前20行,以取出的结果为基础,取tail
dplyr::group_by(iris,Species)#根据Species进行分组
iris %>% group_by(Species) %>% summarise(avg=mean(
  Sepal.Width)) %>% arrange(avg)
#函数mutate可增加新的数据
dplyr::mutate(iris,new=Sepal.Length+Petal.Length)
#Connecte将两个数据进行整合
a=data.frame(x1=c("A","B","C"),x2=c(1,2,3))
b=data.frame(x1=c("A","B","D"),x3=c(T,F,T))
dplyr::left_join(a,b,by="x1")#左连接
dplyr::full_join(a,b,by="x1")#全连接
dplyr::semi_join(a,b,by="x1")#输出交集
dplyr::anti_join(a,b,by="x1")#输出补集

first <- slice(mtcars,1:20)
mtcars <- mutate(mtcars,Model=rownames(mtcars))
#mutate是在已有数据中添加新的数据同时保留原数据
#mutate仅保留变量,不保留行名(rownames),只能再次添加
#Rownames <- rownames(datasets::mtcars)
#rownames(mtcars) <- Rownames

first <- slice(mtcars,1:20)
second <- slice (mtcars,10:30)
intersect(first, second)#取交集
union_all(first, second)#取并集
union(first, second)#取非重复的并集
setdiff(first, second)#取first的补集
setdiff(second, first) 

mtcars <- datasets::mtcars#使mtcars回复原始的数据
Model <- rownames(mtcars)
mtcars <- transmute(mtcars,Model)
#transmute是增加新数据同时清空原数据,和mutate一样不保留行名
posted @ 2021-04-14 20:12  KONGQer  阅读(56)  评论(0)    收藏  举报