R语言实战 - 基本数据管理(3)
8. 数据排序
> leadership$age
[1] 32 45 25 39 NA
> newdata <- leadership[order(leadership$age),]
> newdata
manager testDate country gender age item1 item2 item3 item4 item5
3 3 2008-10-01 UK F 25 3 5 5 5 2
1 1 2008-10-24 US M 32 5 4 5 5 5
4 4 2008-10-12 UK M 39 3 3 4 NA NA
2 2 2008-10-28 US F 45 3 5 2 5 5
5 5 2009-05-01 UK F NA 2 2 1 2 1
stringAsFactors agecat
3 FALSE Young
1 FALSE Young
4 FALSE Young
2 FALSE Young
5 FALSE <NA>
>
>
> attach(leadership)
The following objects are masked _by_ .GlobalEnv:
age, country, gender, manager
> newdata <- leadership[order(gender, age),]
> detach(leadership)
> newdata
manager testDate country gender age item1 item2 item3 item4 item5
3 3 2008-10-01 UK F 25 3 5 5 5 2
2 2 2008-10-28 US F 45 3 5 2 5 5
5 5 2009-05-01 UK F NA 2 2 1 2 1
1 1 2008-10-24 US M 32 5 4 5 5 5
4 4 2008-10-12 UK M 39 3 3 4 NA NA
stringAsFactors agecat
3 FALSE Young
2 FALSE Young
5 FALSE <NA>
1 FALSE Young
4 FALSE Young
>
> attach(leadership)
The following objects are masked _by_ .GlobalEnv:
age, country, gender, manager
> newdata <- leadership[order(gender, -age),]
> detach(leadership)
> newdata
manager testDate country gender age item1 item2 item3 item4 item5
5 5 2009-05-01 UK F NA 2 2 1 2 1
2 2 2008-10-28 US F 45 3 5 2 5 5
3 3 2008-10-01 UK F 25 3 5 5 5 2
4 4 2008-10-12 UK M 39 3 3 4 NA NA
1 1 2008-10-24 US M 32 5 4 5 5 5
stringAsFactors agecat
5 FALSE <NA>
2 FALSE Young
3 FALSE Young
4 FALSE Young
1 FALSE Young
>
9. 数据集的合并
9.1 添加列
> patientID <- c(1, 2, 3, 4)
> age <- c(25, 34, 28, 52)
> status <- c("poor", "improved", "excellent", "poor")
> gender <- c("F", "M", "M", "F")
> dataframeA <- data.frame(patientID, gender)
> dataframeA
patientID gender
1 1 F
2 2 M
3 3 M
4 4 F
> dataframeB <- data.frame(patientID, age, status)
> dataframeB
patientID age status
1 1 25 poor
2 2 34 improved
3 3 28 excellent
4 4 52 poor
> total <- merge(dataframeA, dataframeB, by="ID")
Error in fix.by(by.x, x) : 'by' must specify a uniquely valid column
> total <- merge(dataframeA, dataframeB, by="patientID")
> total
patientID gender age status
1 1 F 25 poor
2 2 M 34 improved
3 3 M 28 excellent
4 4 F 52 poor
> total <- merge(dataframeA, dataframeB, by=c("gender", "age"))
Error in fix.by(by.x, x) : 'by' must specify a uniquely valid column
> total <- merge(dataframeA, dataframeB, by=c("patientID", "age"))
Error in fix.by(by.x, x) : 'by' must specify a uniquely valid column
>
> total <- cbind(dataframeA, dataframeB)
> total
patientID gender patientID age status
1 1 F 1 25 poor
2 2 M 2 34 improved
3 3 M 3 28 excellent
4 4 F 4 52 poor
>
9.2 添加行
> total <- rbind(dataframeA, dataframeB) Error in rbind(deparse.level, ...) : numbers of columns of arguments do not match
10. 数据集取子集
10.1 选入(保留)变量
> newdata <- leadership[, c(6:10)]
> newdata
item1 item2 item3 item4 item5
1 5 4 5 5 5
2 3 5 2 5 5
3 3 5 5 5 2
4 3 3 4 NA NA
5 2 2 1 2 1
>
>
> myvars <- c("item1","item2","item3","item4","item5")
> newdata <- leadership[myvars]
> newdata
item1 item2 item3 item4 item5
1 5 4 5 5 5
2 3 5 2 5 5
3 3 5 5 5 2
4 3 3 4 NA NA
5 2 2 1 2 1
>
>
> myvar <- paste("item", 1:5, seq="")
> myvar
[1] "item 1 " "item 2 " "item 3 " "item 4 " "item 5 "
> myvar <- paste("item", 1:5, sep="")
> myvar
[1] "item1" "item2" "item3" "item4" "item5"
> newdata <- leadership[myvars]
> newdata
item1 item2 item3 item4 item5
1 5 4 5 5 5
2 3 5 2 5 5
3 3 5 5 5 2
4 3 3 4 NA NA
5 2 2 1 2 1
>
10.2 剔除(丢弃)变量
> myvars <- names(leadership) %in% c("item3", "item4")
> myvars
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE
> newdata <- leadership[!myvars]
> newdata
manager testDate country gender age item1 item2 item5 stringAsFactors
1 1 2008-10-24 US M 32 5 4 5 FALSE
2 2 2008-10-28 US F 45 3 5 5 FALSE
3 3 2008-10-01 UK F 25 3 5 2 FALSE
4 4 2008-10-12 UK M 39 3 3 NA FALSE
5 5 2009-05-01 UK F NA 2 2 1 FALSE
agecat
1 Young
2 Young
3 Young
4 Young
5 <NA>
>
>
> names(leadership)
[1] "manager" "testDate" "country" "gender"
[5] "age" "item1" "item2" "item3"
[9] "item4" "item5" "stringAsFactors" "agecat"
>
> newdata <- leadership[c(-8,-9)]
> newdata
manager testDate country gender age item1 item2 item5 stringAsFactors
1 1 2008-10-24 US M 32 5 4 5 FALSE
2 2 2008-10-28 US F 45 3 5 5 FALSE
3 3 2008-10-01 UK F 25 3 5 2 FALSE
4 4 2008-10-12 UK M 39 3 3 NA FALSE
5 5 2009-05-01 UK F NA 2 2 1 FALSE
agecat
1 Young
2 Young
3 Young
4 Young
5 <NA>
> leadership$item3 <- leadership$item4 <- NULL
> leadership
manager testDate country gender age item1 item2 item5 stringAsFactors
1 1 2008-10-24 US M 32 5 4 5 FALSE
2 2 2008-10-28 US F 45 3 5 5 FALSE
3 3 2008-10-01 UK F 25 3 5 2 FALSE
4 4 2008-10-12 UK M 39 3 3 NA FALSE
5 5 2009-05-01 UK F NA 2 2 1 FALSE
agecat
1 Young
2 Young
3 Young
4 Young
5 <NA>
>
10.3 选入观测
> newdata <- leadership[1:3,]
> newdata
manager testDate country gender age item1 item2 item5 stringAsFactors
1 1 2008-10-24 US M 32 5 4 5 FALSE
2 2 2008-10-28 US F 45 3 5 5 FALSE
3 3 2008-10-01 UK F 25 3 5 2 FALSE
agecat
1 Young
2 Young
3 Young
> newdata <- leadership[which(leadership$gender=="M" & leadership$age > 30),]
> newdata
manager testDate country gender age item1 item2 item5 stringAsFactors
1 1 2008-10-24 US M 32 5 4 5 FALSE
4 4 2008-10-12 UK M 39 3 3 NA FALSE
agecat
1 Young
4 Young
> attach(leadership)
The following objects are masked _by_ .GlobalEnv:
age, country, gender, manager
> newdata1 <- leadership[which(gender=='M' & age > 30),]
> detach(leadership)
> newdata1
manager testDate country gender age item1 item2 item5 stringAsFactors
2 2 2008-10-28 US F 45 3 5 5 FALSE
agecat
2 Young
>
> leadership$date <- as.Date(leadership$date, "%m/%d/%y")
Error in as.Date.default(leadership$date, "%m/%d/%y") :
do not know how to convert 'leadership$date' to class “Date”
> leadership$testDate <- as.Date(leadership$testDate, "%m/%d/%y")
> startdate <- as.Date("2009-01-01")
> enddate <- as.Date("2009-10-31")
> newdate <- leadership[which(leadership$testDate >= startdate & leadership$testDate <= enddate),]
> newdate
manager testDate country gender age item1 item2 item5 stringAsFactors
5 5 2009-05-01 UK F NA 2 2 1 FALSE
agecat
5 <NA>
>
10.4 subset() 函数
> leadership manager testDate country gender age item1 item2 item5 stringAsFactors 1 1 2008-10-24 US M 32 5 4 5 FALSE 2 2 2008-10-28 US F 45 3 5 5 FALSE 3 3 2008-10-01 UK F 25 3 5 2 FALSE 4 4 2008-10-12 UK M 39 3 3 NA FALSE 5 5 2009-05-01 UK F NA 2 2 1 FALSE agecat 1 Young 2 Young 3 Young 4 Young 5 <NA> > newdata <- subset(leadership, age >= 35 | age < 24, select=c(item1, item2, item5)) > newdata item1 item2 item5 2 3 5 5 4 3 3 NA > > newdata <- subset(leadership, gender=="M" & age > 25, select=gender:item5) > newdata gender age item1 item2 item5 1 M 32 5 4 5 4 M 39 3 3 NA >
10.5 随机抽样
> leadership manager testDate country gender age item1 item2 item5 stringAsFactors 1 1 2008-10-24 US M 32 5 4 5 FALSE 2 2 2008-10-28 US F 45 3 5 5 FALSE 3 3 2008-10-01 UK F 25 3 5 2 FALSE 4 4 2008-10-12 UK M 39 3 3 NA FALSE 5 5 2009-05-01 UK F NA 2 2 1 FALSE agecat 1 Young 2 Young 3 Young 4 Young 5 <NA> > > mysample <- leadership[sample(1:nrow(leadership), 3, replace=FALSE),] > mysample manager testDate country gender age item1 item2 item5 stringAsFactors 4 4 2008-10-12 UK M 39 3 3 NA FALSE 2 2 2008-10-28 US F 45 3 5 5 FALSE 1 1 2008-10-24 US M 32 5 4 5 FALSE agecat 4 Young 2 Young 1 Young >
posted on 2017-09-08 19:58 你的踏板车要滑向哪里 阅读(1335) 评论(0) 收藏 举报
浙公网安备 33010602011771号