reindex(
labels=None,
index=None,
columns=None,
axis=None,
method=None,
copy=True,
level=None,
fill_value=nan,
limit=None,
tolerance=None,
)
Docstring:
Conform DataFrame to new index with optional filling logic.
or Conform Series to new index with optional filling logic.
Parameters
----------
index : array-like, optional
New labels / index to conform to, should be specified using
keywords. Preferably an Index object to avoid duplicating data.
method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}
Method to use for filling holes in reindexed DataFrame.
Please note: this is only applicable to DataFrames/Series with a
monotonically increasing/decreasing index.
* None (default): don't fill gaps
* pad / ffill: Propagate last valid observation forward to next
valid.
* backfill / bfill: Use next valid observation to fill gap.
* nearest: Use nearest valid observations to fill gap.
copy : bool, default True
Return a new object, even if the passed indexes are the same.
level : int or name
Broadcast across a level, matching Index values on the
passed MultiIndex level.
fill_value : scalar, default np.NaN
Value to use for missing values. Defaults to NaN, but can be any
"compatible" value.
limit : int, default None
Maximum number of consecutive elements to forward or backward fill.
tolerance : optional
Maximum distance between original and new labels for inexact
matches. The values of the index at the matching locations most
satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
Tolerance may be a scalar value, which applies the same tolerance
to all values, or list-like, which applies variable tolerance per
element. List-like includes list, tuple, array, Series, and must be
the same size as the index and its dtype must exactly match the
index's type.
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
np.random.seed(666)
# series reindex
s1 = Series([1, 2, 3, 4], index=['A', 'B', 'C', 'D'])
print(s1)
'''
A 1
B 2
C 3
D 4
dtype: int64
'''
# 重新指定 index, 多出来的index,可以使用fill_value 填充
print(s1.reindex(index=['A', 'B', 'C', 'D', 'E'], fill_value = 10))
'''
A 1
B 2
C 3
D 4
E 10
dtype: int64
'''
s2 = Series(['A', 'B', 'C'], index = [1, 5, 10])
print(s2)
'''
1 A
5 B
10 C
dtype: object
'''
# 修改索引,
# 将s2的索引增加到15个
# 如果新增加的索引值不存在,默认为 Nan
print(s2.reindex(index=range(15)))
'''
0 NaN
1 A
2 NaN
3 NaN
4 NaN
5 B
6 NaN
7 NaN
8 NaN
9 NaN
10 C
11 NaN
12 NaN
13 NaN
14 NaN
dtype: object
'''
# ffill : foreaward fill 向前填充,
# 如果新增加索引的值不存在,那么按照前一个非nan的值填充进去
print(s2.reindex(index=range(15), method='ffill'))
'''
0 NaN
1 A
2 A
3 A
4 A
5 B
6 B
7 B
8 B
9 B
10 C
11 C
12 C
13 C
14 C
dtype: object
'''
# reindex dataframe
df1 = DataFrame(np.random.rand(25).reshape([5, 5]), index=['A', 'B', 'D', 'E', 'F'], columns=['c1', 'c2', 'c3', 'c4', 'c5'])
print(df1)
'''
c1 c2 c3 c4 c5
A 0.700437 0.844187 0.676514 0.727858 0.951458
B 0.012703 0.413588 0.048813 0.099929 0.508066
D 0.200248 0.744154 0.192892 0.700845 0.293228
E 0.774479 0.005109 0.112858 0.110954 0.247668
F 0.023236 0.727321 0.340035 0.197503 0.909180
'''
# 为 dataframe 添加一个新的索引
# 可以看到 自动 扩充为 nan
print(df1.reindex(index=['A', 'B', 'C', 'D', 'E', 'F']))
''' 自动填充为 nan
c1 c2 c3 c4 c5
A 0.700437 0.844187 0.676514 0.727858 0.951458
B 0.012703 0.413588 0.048813 0.099929 0.508066
C NaN NaN NaN NaN NaN
D 0.200248 0.744154 0.192892 0.700845 0.293228
E 0.774479 0.005109 0.112858 0.110954 0.247668
F 0.023236 0.727321 0.340035 0.197503 0.909180
'''
# 扩充列, 也是一样的
print(df1.reindex(columns=['c1', 'c2', 'c3', 'c4', 'c5', 'c6']))
'''
c1 c2 c3 c4 c5 c6
A 0.700437 0.844187 0.676514 0.727858 0.951458 NaN
B 0.012703 0.413588 0.048813 0.099929 0.508066 NaN
D 0.200248 0.744154 0.192892 0.700845 0.293228 NaN
E 0.774479 0.005109 0.112858 0.110954 0.247668 NaN
F 0.023236 0.727321 0.340035 0.197503 0.909180 NaN
'''
# 减小 index
print(s1.reindex(['A', 'B']))
''' 相当于一个切割效果
A 1
B 2
dtype: int64
'''
print(df1.reindex(index=['A', 'B']))
''' 同样是一个切片的效果
c1 c2 c3 c4 c5
A 0.601977 0.619927 0.251234 0.305101 0.491200
B 0.244261 0.734863 0.569936 0.889996 0.017936
'''
# 对于一个 serie 来说,可以使用 drop,来丢掉某些 index
print(s1.drop('A'))
''' 就只剩下 三个了
B 2
C 3
D 4
dtype: int64
'''
# dataframe drop(A) 直接去掉一行
print(df1.drop('A', axis=0))
''' axis 默认 是 行
c1 c2 c3 c4 c5
B 0.571883 0.254364 0.530883 0.295224 0.352663
D 0.858452 0.379495 0.593284 0.786078 0.949718
E 0.556276 0.643187 0.808664 0.289422 0.501041
F 0.737993 0.286072 0.332714 0.873371 0.421615
'''
print(df1.drop('c1', axis=1))
''' 将 c1 的列 去掉
c2 c3 c4 c5
A 0.326681 0.247832 0.601982 0.145905
B 0.373961 0.393819 0.439284 0.926706
D 0.558490 0.617851 0.461280 0.373102
E 0.030434 0.566498 0.383103 0.739243
F 0.982220 0.989826 0.957863 0.411514
'''