R语言实战 - 基本数据管理(1)
1. 一个示例
> manager <- c(1,2,3,4,5)
> date <- c("10/24/08", "10/28/08", "10/1/08", "10/12/08", "5/1/09")
> country <- c("US", "US", "UK", "UK", "UK")
> gender <- c("M", "F", "F", "M", "F")
> age <- c(32, 45, 25, 39, 99)
> q1 <- c(5, 3, 3, 3, 2)
> q2 <- c(4, 5, 5, 3, 2)
> q3 <- c(5, 2, 5, 4, 1)
> q4 <- c(5, 5, 5, NA, 2)
> q5 <- c(5, 5, 2, NA, 1)
> leadership <- data.frame(manager, date, country, gender, age, q1, q2,
+ q3, q4, q5, stringAsFactors=FALSE)
> leadership
manager date country gender age q1 q2 q3 q4 q5 stringAsFactors
1 1 10/24/08 US M 32 5 4 5 5 5 FALSE
2 2 10/28/08 US F 45 3 5 2 5 5 FALSE
3 3 10/1/08 UK F 25 3 5 5 5 2 FALSE
4 4 10/12/08 UK M 39 3 3 4 NA NA FALSE
5 5 5/1/09 UK F 99 2 2 1 2 1 FALSE
2. 创建新变量
> mydata <- data.frame(x1=c(2,2,6,4), x2=c(3,4,2,8)) > mydata x1 x2 1 2 3 2 2 4 3 6 2 4 4 8 > mydata$sumx <- mydata$x1 + mydata$x2 > mydata$sumx [1] 5 6 8 12 > mydata$mean <- (mydata$x1 + mydata$x2)/2 > mydata$mean [1] 2.5 3.0 4.0 6.0 > attach(mydata) > mydata$sumx <- x1 + x2 > mydata$sumx [1] 5 6 8 12 > mydata$mean <- (x1 + x2)/2 > mydata$mean [1] 2.5 3.0 4.0 6.0 > detach(mydata) > mydata x1 x2 sumx mean 1 2 3 5 2.5 2 2 4 6 3.0 3 6 2 8 4.0 4 4 8 12 6.0 > mydata <- transform(mydata, sumx = x1+x2, meanx = (x1+x2)/2) > mydata x1 x2 sumx mean meanx 1 2 3 5 2.5 2.5 2 2 4 6 3.0 3.0 3 6 2 8 4.0 4.0 4 4 8 12 6.0 6.0
3. 变量的重编码
> leadership$age[leadership$age == 99] <- NA
> leadership$agecat[leadership$age > 75] <- "Elder"
> leadership$agecat[leadership$age>=55 $
+ leadership$age<=75] <- "Middle Aged"
Error: unexpected '<=' in:
"leadership$agecat[leadership$age>=55 $
leadership$age<="
> leadership$agecat[leadership$age >= 55 & leadership$age <=75] <- "Middle Aged"
> leadership$agecat[leadership$age < 55] <- "Young"
>
> leadership <- within(leadership, {
+ agecat <- NA
+ agecat[age > 75] <- "Elder"
+ agecat[age >= 55 & age <= 75] <- "Middle Aged"
+ agecat[age < 55] <- "Young"})
> leadership
manager date country gender age q1 q2 q3 q4 q5 stringAsFactors agecat
1 1 10/24/08 US M 32 5 4 5 5 5 FALSE Young
2 2 10/28/08 US F 45 3 5 2 5 5 FALSE Young
3 3 10/1/08 UK F 25 3 5 5 5 2 FALSE Young
4 4 10/12/08 UK M 39 3 3 4 NA NA FALSE Young
5 5 5/1/09 UK F NA 2 2 1 2 1 FALSE <NA>
4. 变量的重命名
> fix(leadership)
>
> library(reshape)
Error in library(reshape) : there is no package called ‘reshape’
> install.packages("reshape")
Installing package into ‘C:/Users/WZhong/Documents/R/win-library/3.4’
(as ‘lib’ is unspecified)
also installing the dependencies ‘Rcpp’, ‘plyr’
trying URL 'https://mirror.lzu.edu.cn/CRAN/bin/windows/contrib/3.4/Rcpp_0.12.12.zip'
Content type 'application/zip' length 3319142 bytes (3.2 MB)
downloaded 3.2 MB
trying URL 'https://mirror.lzu.edu.cn/CRAN/bin/windows/contrib/3.4/plyr_1.8.4.zip'
Content type 'application/zip' length 1220105 bytes (1.2 MB)
downloaded 1.2 MB
trying URL 'https://mirror.lzu.edu.cn/CRAN/bin/windows/contrib/3.4/reshape_0.8.7.zip'
Content type 'application/zip' length 128278 bytes (125 KB)
downloaded 125 KB
package ‘Rcpp’ successfully unpacked and MD5 sums checked
package ‘plyr’ successfully unpacked and MD5 sums checked
Warning: unable to move temporary installation ‘C:\Users\WZhong\Documents\R\win-library\3.4\file2cd073ad6c49\plyr’ to ‘C:\Users\WZhong\Documents\R\win-library\3.4\plyr’
package ‘reshape’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\WZhong\AppData\Local\Temp\RtmpiKKe8J\downloaded_packages
> library(reshape)
Error: package or namespace load failed for ‘reshape’ in loadNamespace(i, c(lib.loc, .libPaths()), versionCheck = vI[[i]]):
there is no package called ‘plyr’
> install.packages(plyr)
Error in install.packages(plyr) : object 'plyr' not found
> install.packages("plyr")
Installing package into ‘C:/Users/WZhong/Documents/R/win-library/3.4’
(as ‘lib’ is unspecified)
trying URL 'https://mirror.lzu.edu.cn/CRAN/bin/windows/contrib/3.4/plyr_1.8.4.zip'
Content type 'application/zip' length 1220105 bytes (1.2 MB)
downloaded 1.2 MB
package ‘plyr’ successfully unpacked and MD5 sums checked
Warning: unable to move temporary installation ‘C:\Users\WZhong\Documents\R\win-library\3.4\file2cd057b1e2f\plyr’ to ‘C:\Users\WZhong\Documents\R\win-library\3.4\plyr’
The downloaded binary packages are in
C:\Users\WZhong\AppData\Local\Temp\RtmpiKKe8J\downloaded_packages
> library(plyr)
Error in library(plyr) : there is no package called ‘plyr’
> library(reshape)
Error: package or namespace load failed for ‘reshape’ in loadNamespace(i, c(lib.loc, .libPaths()), versionCheck = vI[[i]]):
there is no package called ‘plyr’
> plyr
Error: object 'plyr' not found
> library(plyr
+ )
Error in library(plyr) : there is no package called ‘plyr’
> install.packages("plyr")
Installing package into ‘C:/Users/WZhong/Documents/R/win-library/3.4’
(as ‘lib’ is unspecified)
trying URL 'https://mirror.lzu.edu.cn/CRAN/bin/windows/contrib/3.4/plyr_1.8.4.zip'
Content type 'application/zip' length 1220105 bytes (1.2 MB)
downloaded 1.2 MB
package ‘plyr’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\WZhong\AppData\Local\Temp\RtmpiKKe8J\downloaded_packages
> library(plyr)
> library(reshape)
Attaching package: ‘reshape’
The following objects are masked from ‘package:plyr’:
rename, round_any
> leadership <- rename(leadership, c(manager="managerID", date="testDate"))
> leadreship
Error: object 'leadreship' not found
> leadership
managerID testDate country gender age q1 q2 q3 q4 q5 stringAsFactors agecat
1 1 10/24/08 US M 32 5 4 5 5 5 FALSE Young
2 2 10/28/08 US F 45 3 5 2 5 5 FALSE Young
3 3 10/1/08 UK F 25 3 5 5 5 2 FALSE Young
4 4 10/12/08 UK M 39 3 3 4 NA NA FALSE Young
5 5 5/1/09 UK F NA 2 2 1 2 1 FALSE <NA>
> names(leadership)[2] <- "testDate"
> names(leadership)
[1] "managerID" "testDate" "country" "gender"
[5] "age" "q1" "q2" "q3"
[9] "q4" "q5" "stringAsFactors" "agecat"
> names(leadership)[1] <- "manager"
> names(leadership)
[1] "manager" "testDate" "country" "gender"
[5] "age" "q1" "q2" "q3"
[9] "q4" "q5" "stringAsFactors" "agecat"
> names(leadership)[6:10] <- c("item1", "item2", "item3", "item4", "item5")
> names(leadership)
[1] "manager" "testDate" "country" "gender"
[5] "age" "item1" "item2" "item3"
[9] "item4" "item5" "stringAsFactors" "agecat"
>
posted on 2017-09-05 23:35 你的踏板车要滑向哪里 阅读(338) 评论(0) 收藏 举报
浙公网安备 33010602011771号