test

lend_club 全球最大的P2P平台。 
此文章基于R语言做简单分析。

  1. rm(list=ls()) #清除变量
  2. gc() #释放内存
  • step1 
    考虑到后续分析 
    将数据导入sqlserver,用到SSIS 
    如图

 
 **此处有坑

  • step2 
    连接sqlserver,并将数据读入R。
  1. plot
    
    lend_club4<- lend_club3%>%
      group_by(m,y)%>%
      summarise(total_m=sum(sumamount))
    
    lend_club4
    head(lend_club4)
    Source: local data frame [6 x 3]
    Groups: m [2]
    
          m     y   total_m
      (chr) (chr)     (dbl)
    1    01  2008  32256329
    2    01  2009  28523635
    3    01  2010  63082946
    4    01  2011 171186425
    5    01  2012 297667575
    6    02  2008  20596688
    

      

      

  • step3
  1. library(ggplot2)
  2. qplot(date_1,sumamount,data=lend_club1,geom="line") # 每天贷款金额的时序图

  1. p<-qplot(month_day,sumamount,data=lend_club1)
  2. p+facet_wrap(~year) #2007-2012 期间每日的贷款金额

  1. library(tidyr)
  2. library(dplyr)
  3. lend_club2<-separate(lend_club1,date_1,c("y","m","d"),sep="-")
  4. head(lend_club2)
  5. sumamount y m d year month_day
  6. 1 2000 2007 05 26 2007 05/26
  7. 2 47400 2007 05 27 2007 05/27
  8. 3 23900 2007 05 28 2007 05/28
  9. 4 121050 2007 05 29 2007 05/29
  10. 5 87500 2007 05 30 2007 05/30
  11. 6 46500 2007 05 31 2007 05/31
  1. lend_club3<-unite(lend_club2,"y_m",y,m,sep="-",remove = F)
  2. head(lend_club3)
  3. sumamount y_m y m d year month_day
  4. 1 2000 2007-05 2007 05 26 2007 05/26
  5. 2 47400 2007-05 2007 05 27 2007 05/27
  6. 3 23900 2007-05 2007 05 28 2007 05/28
  7. 4 121050 2007-05 2007 05 29 2007 05/29
  8. 5 87500 2007-05 2007 05 30 2007 05/30
  9. 6 46500 2007-05 2007 05 31 2007 05/31
  10. qplot(m,sumamount,data=lend_club3,geom=c("boxplot")+facet_wrap(~year) #2007~2012年每月贷款金额的箱线图

  1. lend_club4<- lend_club3%>%
  2. group_by(m,y)%>%
  3. summarise(total_m=sum(sumamount))
  4. lend_club4
  5. head(lend_club4)
  6. Source: local data frame [6 x 3]
  7. Groups: m [2]
  8. m y total_m
  9. (chr) (chr) (dbl)
  10. 1 01 2008 32256329
  11. 2 01 2009 28523635
  12. 3 01 2010 63082946
  13. 4 01 2011 171186425
  14. 5 01 2012 297667575
  15. 6 02 2008 20596688
  1. 折线图 分面
  2. p<-qplot(m,total_m,data=lend_club4)+geom_smooth(aes(group=y,colour=y),method = "lm")

折线图 分面

  1. p<-qplot(m,total_m,data=lend_club4)+geom_smooth(aes(group=y,colour=y))

  1. p+facet_wrap(~y)

  1. lend<-read.csv("C:\\Users\\liyi\\Desktop\\lend_club.csv")
  2. lend1<-read.csv("C:\\Users\\liyi\\Desktop\\lend_club.csv",header = F)
  3. lend1<-lend1[-1,]
  4. head(lend1)
  5. lend1<-lend1[,c(1,3,9)]
  6. myvar<-c("amount","year","employment")
  7. names(lend1)<-myvar
  8. head(lend1)
  9. str(lend1)
  10. lend1$amountnew<-as.numeric(as.character(lend1$amount))
  11. library(sqldf)
  12. lend2<-sqldf('select sum(V1),V3,V9
  13. from lend1
  14. group by V3,V9')
  15. q<-qplot(employment,amountnew,data = lend1,geom=c("boxplot"),colour=lend1$employment)+facet_wrap(~year)
  16. q<- q+theme(axis.text.x=element_text(angle=90,hjust=1,colour="black"),legend.position='none')
  17. q<- q+scale_y_continuous(limits = c(0, 100000))
  18. q

posted @ 2016-07-31 06:53  li_volleyball  阅读(171)  评论(0编辑  收藏  举报