CM.py

导航

使用R语言实现批量录入文件并进行主成分分析

#Create a file address
File_names <- dir("input")
#print(File_names)
Address <- paste("D:/R working directory/input/",File_names,sep="")
#Address <- paste("./input/",File_names, sep="")
#print(Address)
n <- length(Address)
print(n)
Cycle <-c(2:n)
Data_set <- read.table(Address[1], sep="\t")
for (i in Cycle) {
Add_dataset <- read.table(Address[i],sep="\t")
# print(Add_dataset[,2])
Data_set[,(i+1)] <- Add_dataset[,2]
# Data_set <- cbind(Data_set, Add_dataset[,2])

}

#the codes upward are the core of inputing the data  and I provide two ways to approach it , the first one will run faster than the second one
#fix(Data_set)
#Attention :You should know that every sentence in the recycle will repeat in each cycle ,if you put the Data_set in the recycle ,
#then Data_set will reset in each cycle! and you will get the unexpected result.
# standard the data frame

z <- scale(Data_set[,-1])

#reverse the Data_set and set the row.names

Counter_z <- t(z)

#fix(Counter_z)

colnames(Counter_z) <-Data_set[,1]
#fix(Counter_z)
# PCA(Principal component analysis)
library(psych)

#show the scree plot to determine how many PCs should be chosed
#fa.parallel(Counter_z, fa="pc",n.iter=100,
# show.legend=F,main="Scree plot with parallel analysis")
pc <- principal(Counter_z, nfactors=2,scores=TRUE)
print(head(pc$scores))

posted on 2016-04-26 00:22  CM.py  阅读(132)  评论(0)    收藏  举报