1st_pandas

笔记内容来自up主莫烦
https://www.bilibili.com/video/BV1Ex411L7oT?p=9

import pandas as pd
import numpy as np

"""
1. 创建连续时间字符串 pd.date_range(start,end,periods,freq)
dates = pd.date_range('20160101',periods=6)


2. pd.DataFrame(data,index,column,dtype,copy)的创建

1)
# 这里要注意 随机生成数据np.random.randn(6,4)
df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=[1,2,3,4])

print(df)
1 2 3 4
2016-01-01 0.232239 -2.057311 1.471347 -1.006878
2016-01-02 0.644637 -1.303135 -0.457582 -0.847513
2016-01-03 0.049504 -0.297996 0.640345 0.841291
2016-01-04 -0.208046 -1.093770 -1.206976 0.977323
2016-01-05 -0.952440 0.886028 1.401906 -0.898003
2016-01-06 0.287711 1.075616 -1.715452 0.669161

2) 不带 index col 名 ,则默认 index col 为数字
df1 = pd.DataFrame(np.arange(12).reshape(3,4))
0 1 2 3
0 0 1 2 3
1 4 5 6 7
2 8 9 10 11

3)用字典创建DataFranme 行数(index)会自动与最多项对齐
df2 = pd.DataFrame({'A':[1,2,3,4],'B':2,'C':3})
A B C
0 1 2 3
1 2 2 3
2 3 2 3
3 4 2 3

3.DataFrame的属性访问

1)print(df.index) # 输出 row 的名字
DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
'2016-01-05', '2016-01-06'],
dtype='datetime64[ns]', freq='D')

2)print(df.columns) # 输出 col 的名字
Int64Index([1, 2, 3, 4], dtype='int64')

3)print(df.values) # 输出 data
[[ 0.23223861 -2.05731101 1.47134695 -1.00687769]
[ 0.64463711 -1.30313538 -0.45758167 -0.84751345]
[ 0.04950445 -0.29799635 0.64034504 0.84129072]
[-0.20804606 -1.09376993 -1.2069759 0.97732253]
[-0.95243955 0.88602791 1.40190587 -0.89800295]
[ 0.28771058 1.07561617 -1.71545208 0.66916143]]

4)print(df.describe()) # 只能计算 数值,忽视 data,str 等
1 2 3 4
count 6.000000 6.000000 6.000000 6.000000
mean 0.008934 -0.465095 0.022265 -0.044103
std 0.548574 1.253790 1.352495 0.963071
min -0.952440 -2.057311 -1.715452 -1.006878
25% -0.143658 -1.250794 -1.019627 -0.885381
50% 0.140872 -0.695883 0.091382 -0.089176
75% 0.273843 0.590022 1.211516 0.798258
max 0.644637 1.075616 1.471347 0.977323

4. pd.DataFrame()的排序
1) df.sort_index() 只根据 行列排序
print(df.sort_index(axis=1,ascending=False))
4 3 2 1
2016-01-01 -1.006878 1.471347 -2.057311 0.232239
2016-01-02 -0.847513 -0.457582 -1.303135 0.644637
2016-01-03 0.841291 0.640345 -0.297996 0.049504
2016-01-04 0.977323 -1.206976 -1.093770 -0.208046
2016-01-05 -0.898003 1.401906 0.886028 -0.952440
2016-01-06 0.669161 -1.715452 1.075616 0.287711

print(df.sort_index(axis=0,ascending=False))

1 2 3 4
2016-01-06 0.287711 1.075616 -1.715452 0.669161
2016-01-05 -0.952440 0.886028 1.401906 -0.898003
2016-01-04 -0.208046 -1.093770 -1.206976 0.977323
2016-01-03 0.049504 -0.297996 0.640345 0.841291
2016-01-02 0.644637 -1.303135 -0.457582 -0.847513
2016-01-01 0.232239 -2.057311 1.471347 -1.006878

2)df.sort_values 根据 某一行/列的大小来排序
print(df.sort_values(by=3,ascending=False))
1 2 3 4
2016-01-01 0.232239 -2.057311 1.471347 -1.006878
2016-01-05 -0.952440 0.886028 1.401906 -0.898003
2016-01-03 0.049504 -0.297996 0.640345 0.841291
2016-01-02 0.644637 -1.303135 -0.457582 -0.847513
2016-01-04 -0.208046 -1.093770 -1.206976 0.977323
2016-01-06 0.287711 1.075616 -1.715452 0.669161


print(df.sort_values(by='20160101',axis=1,ascending=False))
3 1 4 2
2016-01-01 1.471347 0.232239 -1.006878 -2.057311
2016-01-02 -0.457582 0.644637 -0.847513 -1.303135
2016-01-03 0.640345 0.049504 0.841291 -0.297996
2016-01-04 -1.206976 -0.208046 0.977323 -1.093770
2016-01-05 1.401906 -0.952440 -0.898003 0.886028
2016-01-06 -1.715452 0.287711 0.669161 1.075616
"""
pd.date_range()

s = pd.Series([1,3,5,np.nan,44,1],dtype=float)
print(s)

dates = pd.date_range('20160101',periods=6)
print(dates)

df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=[1,2,3,4])
print(df)


df1 = pd.DataFrame(np.arange(12).reshape(3,4))
print(df1)

df2 = pd.DataFrame({'A':[1,2,3,4],'B':2,'C':3})
print(df2)

print(df.dtypes)


# 输出 row 的名字
print(df.index)
# 输出 col 的名字
print(df.columns)
# 输出 属性
print(df.values)

# 只能计算 数值,忽视 data,str 等
print(df.describe())

# 使列方向 倒叙排序
print(df.sort_index(axis=1,ascending=False))
# 使行方向 倒叙排序
print(df.sort_index(axis=0,ascending=False))

print(df.sort_values(by=3,ascending=False))

print(df.sort_values(by='20160101',axis=1,ascending=False))
posted @ 2020-05-18 16:04  ChevisZhang  阅读(130)  评论(0编辑  收藏  举报