import pandas as pd
from typing import Union, List
from copy import deepcopy
def deduct_by_sort(basedf: pd.DataFrame, sortby: List[str], ascending: List[bool],
deductdf: pd.DataFrame, key: Union[str, List[str]], deductfield:str) -> pd.DataFrame:
"""
按排序后扣减每行的数量.
df = pd.DataFrame([
{'item': 'A', 'qty': 10},
{'item': 'A', 'qty': 20},
{'item': 'B', 'qty': 100},
{'item': 'B', 'qty': 200},
{'item': 'C', 'qty': 400},
{'item': 'D', 'qty': 400},
])
df1 = pd.DataFrame([
{'item': 'A', 'qty': 14},
{'item': 'B', 'qty': 140},
{'item': 'C', 'qty': 30},
])
# 结果
item qty
0 A 0
1 A 16
2 B 0
3 B 160
4 C 370
5 D 400
"""
def validate(keys: list, df: pd.DataFrame):
if not set(keys).issubset(set(list(df.columns))):
raise KeyError('df必须包含所有的key')
keylist = key if isinstance(key, list) else [key]
for tmpdf in [basedf, deductdf]:
validate(keylist, tmpdf)
basedf = basedf.sort_values(by=sortby, ascending=ascending)
basedf_not_in_deductdf = deepcopy(basedf)
for k in keylist:
basedf_not_in_deductdf = basedf_not_in_deductdf[~basedf_not_in_deductdf[k].isin(deductdf[k])]
deductdf = deductdf.groupby(key, as_index=False)[[deductfield]].sum()
r = pd.DataFrame()
for _, row in deductdf.iterrows():
balance = row[deductfield]
for _, hrow in basedf.iterrows():
a = True
for k in keylist:
if hrow[k] != row[k]:
a = False
break
if a:
if balance >= hrow[deductfield]:
balance = balance - hrow[deductfield]
hrow[deductfield] = 0
else:
hrow[deductfield] = hrow[deductfield] - balance
balance = 0
r = pd.concat([r, pd.DataFrame([hrow])], axis=0)
r = pd.concat([r, basedf_not_in_deductdf], axis=0)
return r