R语言进阶数据展现-4
图例:
rain<-read.csv("cityrain.csv")
plot(rain$Tokyo,type="b",lwd=2,
xaxt="n",ylim=c(0,300),col="black",#xaxt='n'不显示坐标刻度。
xlab="Month",ylab="Rainfall (mm)",
main="Monthly Rainfall in major cities")
axis(1,at=1:length(rain$Month),labels=rain$Month)#添加坐标刻度,at为位置,
lines(rain$Berlin,col="red",type="b",lwd=2)
lines(rain$NewYork,col="orange",type="b",lwd=2)
lines(rain$London,col="purple",type="b",lwd=2)
legend("topright",legend=c("Tokyo","Berlin","New York","London"),#"topright"图例位置,"bottomright", "bottom","bottomleft", "left", "topleft", "top", "right", and "center"
lty=1,lwd=2,pch=21,col=c("black","red","orange","purple"),#pch图例中点的类型
ncol=2,bty="n",cex=0.8,#bty,boxtype="n",不显示。(这个box)指图例的box。ncol为2,即两列。
text.col=c("black","red","orange","purple"),#标签的颜色
inset=0.01)#图例与box的距离(虽然这里box不显示)
legend(1,300,legend=c("Tokyo","Berlin","New York","London"),#用具体坐标确定位置
lty=1,lwd=2,pch=21,col=c("black","red","orange","purple"),
horiz=TRUE,bty="n",bg="yellow",cex=1,#horiz 即水平放置
text.col=c("black","red","orange","purple"))#标签的颜色
标注在线的边上:
gdp<-read.table("gdp_long.txt",header=T)
library(RColorBrewer)#调色板
pal<-brewer.pal(5,"Set1")#提取一个调色板,Set1中的5个颜色
par(mar=par()$mar+c(0,0,0,2),bty="l")#使得右侧有足够多的位置可以放label
plot(Canada~Year,data=gdp,type="l",lwd=2,lty=1,ylim=c(30,60),
col=pal[1],main="Percentage change in GDP",ylab="")
mtext(side=4,at=gdp$Canada[length(gdp$Canada)],text="Canada",
col=pal[1],line=0.3,las=2)#side确定添加在哪一侧。at确定具体位置(对应y值),所以我们用对应的最后一个y值。
lines(gdp$France~gdp$Year,col=pal[2],lwd=2)
mtext(side=4,at=gdp$France[length(gdp$France)],text="France",
col=pal[2],line=0.3,las=2)
lines(gdp$Germany~gdp$Year,col=pal[3],lwd=2)
mtext(side=4,at=gdp$Germany[length(gdp$Germany)],text="Germany",
col=pal[3],line=0.3,las=2)
lines(gdp$Britain~gdp$Year,col=pal[4],lwd=2)
mtext(side=4,at=gdp$Britain[length(gdp$Britain)],text="Britain",
col=pal[4],line=0.3,las=2)
lines(gdp$USA~gdp$Year,col=pal[5],lwd=2)
mtext(side=4,at=gdp$USA[length(gdp$USA)]-2,
text="USA",col=pal[5],line=0.3,las=2)#这里的at -2是为了防止重叠。las为标注的方向。
网格线
画完图以后,直接使用
grid()#自动画合适的网格线
grid(nx=NA, ny=8,#nx表示垂直x轴的线的数目,ny同理。
lwd=1,lty=2,col="blue")#线宽,线型,颜色
特定的x/y值画标注线
abline(v=9)#画竖直线(V),在x=9的位置。
abline(h=150,col="red",lty=2)#画水平线,在y=150的位置.颜色,线型
迷你图()Sparklines
rain <- read.csv("cityrain.csv")
par(mfrow=c(4,1),mar=c(5,7,4,2),omi=c(0.2,2,0.2,2))#把画图区分为4*1,确定边界位置
for(i in 2:5)
{
plot(rain[,i],ann=FALSE,axes=FALSE,type="l",#不要标注和坐标轴
col="gray",lwd=2)
mtext(side=2,at=mean(rain[,i]),names(rain[i]),
las=2,col="black")#放中间值,省略了 text=names(rain[i]
mtext(side=4,at=mean(rain[,i]),mean(rain[i]),
las=2,col="black")#对应的城市
points(which.min(rain[,i]),min(rain[,i]),pch=19,col="blue")#标出最小点
points(which.max(rain[,i]),max(rain[,i]),pch=19,col="red")#标出最大点
}
画数据内部两个变量之间的关系(
rain <- read.csv("cityrain.csv")
plot(rain$Berlin-rain$London,type="l",lwd=2,#直接把公式写进去
xaxt="n",col="blue",
xlab="Month",ylab="Difference in Rainfall (mm)",
main="Difference in Rainfall between Berlin and London (Berlin-London)")
axis(1,at=1:length(rain$Month),labels=rain$Month)
abline(h=0,col="red")
画某一函数表达式的图
x<-1:100
y<-x^3-6*x^2+5*x+10
plot(y~x,type="l",main=expression(f(x)==x^3-6*x^2+5*x+10))#expression()产生表达式的格式。
时间序列图
处理时间格式
sales<-read.csv("dailysales.csv")
d1<-as.Date(sales$date,"%d/%m/%y")#必须先转换成时间格式,R才能理解。
d2<-strptime(sales$date,"%d/%m/%y")
data.class(d1)
[1] "Date"
data.class(d2)
[1] "POSIXt"#这一格式,包括年月日,小时,分钟,秒等
具体见 DateTimeClasses
标注时间在x轴
sales<-read.csv("dailysales.csv")
plot(sales$units~as.Date(sales$date,"%d/%m/%y"),type="l",#as.Datre()的返回值可以直接传给X,所以可以用plot(y~x)的形式
xlab="Date",ylab="Units Sold")
plot(strptime(sales$date,"%d/%m/%Y"),sales$units,type="l",#strptime()的返回值不能直接传给X,所以只能是这样的形式。只能用plot(x,y)的形式
xlab="Date",ylab="Units Sold")
library(zoo)
plot(zoo(sales$units,as.Date(sales$date,"%d/%m/%y")))#zoo()函数,不需要区分x,y
既有日期,又有时间的情况
air<-read.csv("openair.csv")
plot(air$nox~as.Date(air$date,"%d/%m/%Y %H:%M"),type="l",
xlab="Time", ylab="Concentration (ppb)",
main="Time trend of Oxides of Nitrogen")
plot(zoo(air$nox,as.Date(air$date,"%d/%m/%Y %H:%M")),
xlab="Time", ylab="Concentration (ppb)",
main="Time trend of Oxides of Nitrogen")
使用不同的形式把时间标注在x轴(前面的情况中,x轴是由R自动设置的)
air<-read.csv("openair.csv")
plot(air$nox~as.Date(air$date,"%d/%m/%Y %H:%M"),type="l",
xaxt="n",#不画x轴的标注
xlab="Time", ylab="Concentration (ppb)",
main="Time trend of Oxides of Nitrogen")
xlabels<-strptime(air$date, format = "%d/%m/%Y %H:%M")#赋值,POSIXlt的格式
axis.Date(1, at=xlabels[xlabels$mday==1], format="%b-%Y")#mday==1 是否是1号
在某个时间点添加标记线
air<-read.csv("openair.csv")
plot(air$nox~as.Date(air$date,"%d/%m/%Y %H:%M"),type="l",
xlab="Time", ylab="Concentration (ppb)",
main="Time trend of Oxides of Nitrogen")
abline(v=as.Date("25/12/2003","%d/%m/%Y"))
markers<-seq(from=as.Date("25/12/1998","%d/%m/%Y"),
to=as.Date("25/12/2004","%d/%m/%Y"),
by="year")#一年为间隔 (两年应该怎么设置)?
abline(v=markers,col="red")
注意,在一个时间序列中,如果有缺失,R不会留白,而是会把缺失前后的点连起来。需要把缺失段的y值设置为NA以避免这一情况。
根据不同时间段的平均值
air<-read.csv("openair.csv")
air$date = as.POSIXct(strptime(air$date, format = "%d/%m/%Y %H:%M","GMT"))
means <- aggregate(air["nox"], format(air["date"],"%Y-%U"),mean,na.rm = TRUE)#format()提取air["date"]中所有的周。mean表示对数据使用怎么样的算法,na.rm=TRUE表示忽略NA表示的丢失值。
means$date <- seq(air$date[1], air$date[nrow(air)], length = nrow(means))
plot(means$date, means$nox, type = "l")
means <- aggregate(air["nox"], format(air["date"],"%Y-%j"),mean, na.rm = TRUE)#format()提取air["date"]中所有的天。
means$date <- seq(air$date[1], air$date[nrow(air)], length = nrow(means))#从···到····,取length个。
plot(means$date, means$nox, type = "l", xlab="Time", ylab="Concentration (ppb)",
main="Daily Average Concentrations of Oxides of Nitrogen")
画股票图
install.packages("quantmod")#http://www.quantmod.com
install.packages("tseries")
library(quantmod)
library(tseries)
tseries library:
aapl<-get.hist.quote(instrument = "aapl", quote = c("Cl", "Vol"))#这一函数会自动检索股价信息(yahoo/OANDA) CL和Vol表示收盘价和交易量(closing price and volume)
goog <- get.hist.quote(instrument = "goog", quote = c("Cl", "Vol"))
msft <- get.hist.quote(instrument = "msft", quote = c("Cl", "Vol"))
plot(msft$Close,main = "Stock Price Comparison",ylim=c(0,800), col="red", type="l", lwd=0.5, pch=19,cex=0.6, xlab="Date" ,ylab="Stock Price (USD)")
lines(goog$Close,col="blue",lwd=0.5)
lines(aapl$Close,col="gray",lwd=0.5)
legend("top",horiz=T,legend=c("Microsoft","Google","Apple"),col=c("red","blue","gray"),lty=1,bty="n")
quantmod package
getSymbols("AAPL",src="yahoo")#the default source is Yahoo.这样得到以后,R会把数据存与代码同名的object中,所以下面可以直接用。
barChart(AAPL)
candleChart(AAPL,theme="white")#蜡烛图

浙公网安备 33010602011771号