numpy and pandas

import numpy as np
array=np.array([[1,2,3]
              ,[4,5,6]], dtype=np.int)
print(array)
print(array.ndim)  #是数组有几个维度
print(array.size)  #有多少个元素
print(array.shape) #维度大小,array.shape[0]为2,array.shape[1]为3
print(array.dtype) #数据类型
print(len(array))  #行数
print(array.reshape(1,6)) #改变形状
a=np.arange(4) #左闭右开
print(a)
print(a<2) #布尔mask

输出:

[[1 2 3]
 [4 5 6]]
2
6
(2, 3)
int32
2
[[1 2 3 4 5 6]]
[0 1 2 3]
[ True  True False False]
#numpy dot multiply and matrix multiply
a=np.ones((2,2))
b=np.array([[1,2],[3,4]])
print(a*b)  #对应元素相城
print(np.dot(a,b))  #矩阵乘法
print(a.dot(b))     #矩阵乘法

输出

[[1. 2.]
 [3. 4.]]
[[4. 6.]
 [4. 6.]]
[[4. 6.]
 [4. 6.]]
a =np.random.random((2,4))
print(a)
print(np.sum(a, axis=1)) #0 按列求和; 1 按行求和(0是竖方向,1是横方向)
print(np.min(a, axis=0))
print(np.max(a, axis=1))

输出

[[0.64231375 0.18094835 0.1069843  0.99325735]
 [0.58826183 0.45672993 0.31294667 0.56790276]]
[1.92350376 1.92584119]
[0.58826183 0.18094835 0.1069843  0.56790276]
[0.99325735 0.58826183]
A =np.arange(2,14).reshape((3,4))
print(A)
print(np.mean(A))   #均值
print(np.median(A)) #中位数
print(np.argmin(A)) #最小数字的索引
print(np.argmax(A)) # 索引
print(np.cumsum(A)) # 依次累加
print(np.diff(A))   # 前后差
print(np.sort(A))  #逐行排序
print(np.transpose(A)) #转置
print(A.T)
print(np.clip(A, 3, 5)) # 截断,min, max

for col in A.T:
    print(col)
print(A.flatten())
for item in A.flat: # A.flat 得到一个迭代器
    print(item)

输出

[[ 2  3  4  5]
 [ 6  7  8  9]
 [10 11 12 13]]
7.5
7.5
0
11
[ 2  5  9 14 20 27 35 44 54 65 77 90]
[[1 1 1]
 [1 1 1]
 [1 1 1]]
[[ 2  3  4  5]
 [ 6  7  8  9]
 [10 11 12 13]]
[[ 2  6 10]
 [ 3  7 11]
 [ 4  8 12]
 [ 5  9 13]]
[[ 2  6 10]
 [ 3  7 11]
 [ 4  8 12]
 [ 5  9 13]]
[[3 3 4 5]
 [5 5 5 5]
 [5 5 5 5]]
[ 2  6 10]
[ 3  7 11]
[ 4  8 12]
[ 5  9 13]
[ 2  3  4  5  6  7  8  9 10 11 12 13]
2
3
4
5
6
7
8
9
10
11
12
13
# pandas 做个性化筛选
import pandas as pd
import numpy as np

dates=pd.date_range('20201004',periods=6)
df=pd.DataFrame(np.arange(24).reshape((6,4)), index=dates, columns=['A','B','C','D'])

# print(df['A'],df.A)  #根据行和列的名称来索引
# print(df[0:3])

# loc 通过标签名称索引
print(df.loc['2020-10-05'])
print(df.loc[:,['A','D']])

# iloc 通过下标进行索引
print(df.iloc[[1,3,5],1:3])

# 更改元素值
df.iloc[2,2]=555
df.loc['2020-10-05','A']=999
print(df)

df.B[df.A>16]=0
print(df)

# 处理为Nan的数据
df.iloc[2,3]=np.nan
df.iloc[1,2]=np.nan
print(df)

print(df.dropna(axis=0, how='any')) # how ={'any','all'}
print(df.fillna(value=0))
print(df.isnull())
print(np.any(df.isnull())==True)

输出

A    4
B    5
C    6
D    7
Name: 2020-10-05 00:00:00, dtype: int32
             A   D
2020-10-04   0   3
2020-10-05   4   7
2020-10-06   8  11
2020-10-07  12  15
2020-10-08  16  19
2020-10-09  20  23
             B   C
2020-10-05   5   6
2020-10-07  13  14
2020-10-09  21  22
              A   B    C   D
2020-10-04    0   1    2   3
2020-10-05  999   5    6   7
2020-10-06    8   9  555  11
2020-10-07   12  13   14  15
2020-10-08   16  17   18  19
2020-10-09   20  21   22  23
              A   B    C   D
2020-10-04    0   1    2   3
2020-10-05  999   0    6   7
2020-10-06    8   9  555  11
2020-10-07   12  13   14  15
2020-10-08   16  17   18  19
2020-10-09   20   0   22  23
              A   B      C     D
2020-10-04    0   1    2.0   3.0
2020-10-05  999   0    NaN   7.0
2020-10-06    8   9  555.0   NaN
2020-10-07   12  13   14.0  15.0
2020-10-08   16  17   18.0  19.0
2020-10-09   20   0   22.0  23.0
             A   B     C     D
2020-10-04   0   1   2.0   3.0
2020-10-07  12  13  14.0  15.0
2020-10-08  16  17  18.0  19.0
2020-10-09  20   0  22.0  23.0
              A   B      C     D
2020-10-04    0   1    2.0   3.0
2020-10-05  999   0    0.0   7.0
2020-10-06    8   9  555.0   0.0
2020-10-07   12  13   14.0  15.0
2020-10-08   16  17   18.0  19.0
2020-10-09   20   0   22.0  23.0
                A      B      C      D
2020-10-04  False  False  False  False
2020-10-05  False  False   True  False
2020-10-06  False  False  False   True
2020-10-07  False  False  False  False
2020-10-08  False  False  False  False
2020-10-09  False  False  False  False
True

 

posted @ 2020-10-08 15:39  kkzhang  阅读(118)  评论(0编辑  收藏  举报