import numpy as np
import pandas as pd
left = pd.DataFrame({'A': [1., np.nan, 3., 4.],
'B': [1., 2., 3., 4.]})
d = np.nanpercentile(left["B"], np.linspace(0, 100, 5))
print(np.linspace(0, 100, 5))
print(d)
print("*" * 100)
print(type(d))
print("*" * 100)
print(pd.cut(left["B"], d))
print("*" * 100)
print(pd.cut(left["B"], d, labels=False))
[ 0. 25. 50. 75. 100.]
[1. 1.75 2.5 3.25 4. ]
****************************************************************************************************
<class 'numpy.ndarray'>
****************************************************************************************************
0 NaN
1 (1.75, 2.5]
2 (2.5, 3.25]
3 (3.25, 4.0]
Name: B, dtype: category
Categories (4, interval[float64]): [(1.0, 1.75] < (1.75, 2.5] < (2.5, 3.25] < (3.25, 4.0]]
****************************************************************************************************
0 NaN
1 1.0
2 2.0
3 3.0
Name: B, dtype: float64