使用R语言实现批量录入文件并进行主成分分析
#Create a file address
File_names <- dir("input")
#print(File_names)
Address <- paste("D:/R working directory/input/",File_names,sep="")
#Address <- paste("./input/",File_names, sep="")
#print(Address)
n <- length(Address)
print(n)
Cycle <-c(2:n)
Data_set <- read.table(Address[1], sep="\t")
for (i in Cycle) {
Add_dataset <- read.table(Address[i],sep="\t")
# print(Add_dataset[,2])
Data_set[,(i+1)] <- Add_dataset[,2]
# Data_set <- cbind(Data_set, Add_dataset[,2])
}
#the codes upward are the core of inputing the data and I provide two ways to approach it , the first one will run faster than the second one
#fix(Data_set)
#Attention :You should know that every sentence in the recycle will repeat in each cycle ,if you put the Data_set in the recycle ,
#then Data_set will reset in each cycle! and you will get the unexpected result.
# standard the data frame
z <- scale(Data_set[,-1])
#reverse the Data_set and set the row.names
Counter_z <- t(z)
#fix(Counter_z)
colnames(Counter_z) <-Data_set[,1]
#fix(Counter_z)
# PCA(Principal component analysis)
library(psych)
#show the scree plot to determine how many PCs should be chosed
#fa.parallel(Counter_z, fa="pc",n.iter=100,
# show.legend=F,main="Scree plot with parallel analysis")
pc <- principal(Counter_z, nfactors=2,scores=TRUE)
print(head(pc$scores))
浙公网安备 33010602011771号