import typing as t
import pandas as pd
import logging
import json
from datetime import timedelta
import datetime as dt

花呗逾期

def rule_bad_alipay_hb_overdue(df1: pd.DataFrame, **kwargs):
alipay_df = df1[df1["flowType"] == "ALIPAY"]
alipay_df['ymd'] = pd.to_datetime(alipay_df['ymd'])
overdue_records = alipay_df[(alipay_df["tradeDesc"] == "花呗自动还款")|(alipay_df["tradeDesc"] == "花呗|信用购自动还款")]
if overdue_records.empty:
return None

# 对逾期记录按照日期排序
overdue_records = overdue_records.sort_values(by="ymd")
overdue_records1=overdue_records.copy()
overdue_records.drop_duplicates(subset="ymd",inplace=True)
# 计算每段逾期的时间
overdue_records['diff'] = overdue_records['ymd'].diff()
segments = (overdue_records['diff'] > pd.Timedelta(days=1)).cumsum()

# 按照分段聚合数据
grouped = overdue_records.groupby(segments)

# 统计每段逾期的信息
segment_info = []
for name, group in grouped:
    if len(group) > 1:
        start_time = group['ymd'].iloc[0].strftime("%Y-%m-%d")
        end_time = group['ymd'].iloc[-1].strftime("%Y-%m-%d")
        max_amount = round(group['tradeAmount'].max(), 2)
        segment_info.append({
            'start_time': start_time,
            'end_time': end_time,
            'max_amount': max_amount
        })
# 汇总信息
if len(segment_info) < 1:
    return None
total_overdue_times = len(segment_info)
total_overdue_amount = round(sum(info['max_amount'] for info in segment_info),2)
last_overdue_amount = round(segment_info[-1]['max_amount'],2) if segment_info else 0
overall_start_time = segment_info[0]['start_time'] if segment_info else "N/A"
overall_end_time = segment_info[-1]['end_time'] if segment_info else "N/A"
overdue_records.sort_values(by='tradeTime',ascending=False,inplace=True)
return dict(desc=f"用户花呗账户在{overall_start_time}至{overall_end_time}内逾期{total_overdue_times}次,逾期总额{total_overdue_amount}元,最近逾期金额{last_overdue_amount}元,请重点关注用户履约能力。",\
            tradeNos=overdue_records1['tradeNo'].tolist())

还款压力

def rule_bad_high_repayment_stress(df1: pd.DataFrame, **kwargs) -> t.Optional[t.Dict]:
df1['ymd'] = pd.to_datetime(df1['ymd'])
latest_transaction_time = df1['ymd'].max()
one_month_ago = latest_transaction_time - timedelta(days=30)
recent_transactions = df1[df1['ymd'] >= one_month_ago]
recent_transactions = recent_transactions[recent_transactions['tradeDesc'] !='花呗自动还款']
recent_transactions=recent_transactions[recent_transactions['tradeFunc']!='']
recent_transactions=recent_transactions[recent_transactions['specialTradehHitReason']!='命中疑似网贷(借条)平台']
recent_transactions=recent_transactions[~recent_transactions['counterparty'].str.contains('天创信用|贝融助手|天下信用|鹰眼查询|信诚信大数据查询|影子金服')]
recent_transactions=recent_transactions[~recent_transactions['tradeDesc'].str.contains('天创信用|贝融助手|天下信用|鹰眼查询|信诚信大数据查询|影子金服')]
recent_transactions = recent_transactions[recent_transactions['platformType']!='租机']
filtered_transactions = recent_transactions[(recent_transactions['tradeDesc'].str.contains('还款'))|(recent_transactions['platformType']=='网贷')]
backamount=filtered_transactions[filtered_transactions['tradeDesc'].str.contains('退款')]['tradeAmount'].sum()
amount=filtered_transactions['tradeAmount'].sum()
filtered_transactions.sort_values(by='tradeTime',ascending=False,inplace=True)
amount=amount-backamount
if amount>5000:
amount = round(amount, 2)
return dict(desc=f"用户在最近一个月共还款{amount}元,建议结合收入进行评估。",
tradeNos=filtered_transactions['tradeNo'].tolist())
else:
return None

汽车

def rule_good_car_self(df1: pd.DataFrame, **kwargs):
car_keywords = ["停车", "加油站", "加油卡", "洗车", "高速", "ETC", "4S店", "石油", "石化", "行车记录仪", "车贷", "汽车金融", "二手车", "车险", "车行", "汽车","团油"]
exclude_keywords = ["电动车", "摩托车", "踏板", "电瓶车", "外卖", "车站", "便利店", "饭店", "酒店", "超市", "餐饮店", "南站", "北站", "西站", "东站","总站","客运","公共","针车"]
rental_keywords = ["租车", "租赁", "出租","小N车行"]

# 第一步:筛选包含车辆相关关键词的记录
df1 =df1[df1['tradeType']=="支出"]
filtered_df = df1[df1['counterparty'].str.contains('|'.join(car_keywords), na=False)]

# 第二步:去掉排除关键词
final_df = filtered_df[~filtered_df['counterparty'].str.contains('|'.join(exclude_keywords), na=False)]

special_keywords = ['加油站',"加油卡", '石油',"石化","团油"]
special_condition = final_df['counterparty'].str.contains('|'.join(special_keywords))
final_df = final_df[~(special_condition & ((final_df['tradeType'] != '支出') | (final_df['tradeAmount'] <= 100)))]
# 如果至少有一条记录包含租车关键词,则认为此人没有车
if final_df['counterparty'].str.contains('|'.join(rental_keywords), na=False).any():
    return None

# 统计每个关键词出现的次数
keyword_counts = final_df['counterparty'].apply(lambda x: ' '.join([word for word in car_keywords if word in x]).split()).explode().value_counts()

# 筛选出现两次及以上的关键词
qualifying_keywords = keyword_counts[keyword_counts >= 3].index.tolist()
final_df.sort_values(by='tradeTime',ascending=False,inplace=True)
# 判断是否有至少两种类型的关键词出现两次及以上
if len(qualifying_keywords) >= 2:
    return dict(desc="用户账单内存在多条用车类的消费记录,可根据用车消费记录判断用户车辆情况。", 
                tradeNos=final_df['tradeNo'].tolist())
else:
    return None

房产

改进版本

import pandas as pd
import re
import typing as t
def extract_name_from_desc(desc: str) -> str:
"""
提取姓名规则:
- 如果描述中只有一个-,则-后为姓名。
- 如果描述中有两个-,则两个-之间为姓名。
- 如果描述中有三个-,则前两个-之间为姓名。
"""
parts = desc.split('-')
if len(parts) == 2:
return parts[1]
elif len(parts) == 3:
return parts[1]
elif len(parts) > 3:
return parts[1]
return ''

def is_matching_name(name: str, extracted_name: str) -> bool:
"""
判断两个姓名是否匹配。
- 长度必须一致。
- 打码位置可以不同,但未打码字符需一致。
"""
if len(name) != len(extracted_name):
return False

for c1, c2 in zip(name, extracted_name):
    if c2 != '*' and c1 != c2:
        return False
return True

def rule_good_home_self(df: pd.DataFrame, **kwargs) -> t.Optional[t.Dict]:
"""
判断用户是否至少拥有一处房产。
根据交易描述中的水费、电费、燃气费及姓名进行匹配。
"""
keywords = ['水费', '电费', '燃气费']

# 填充空的姓名并转为字符串
name = df['name'].fillna('').astype(str).max()
if not name or len(name.strip()) == 0:
    return None

# 筛选交易描述包含关键词的记录
df_filtered = df[df['tradeDesc'].str.contains('|'.join(keywords))]
if df_filtered.empty:
    return None

# 提取交易描述中的姓名
df_filtered['提取姓名'] = df_filtered['tradeDesc'].apply(extract_name_from_desc)

# 筛选姓名匹配的记录
df_filtered = df_filtered[df_filtered['提取姓名'].apply(lambda extracted: is_matching_name(name, extracted))]
if df_filtered.empty:
    return None

# 添加命中关键词列
df_filtered['命中关键词'] = df_filtered['tradeDesc'].apply(
    lambda desc: next((keyword for keyword in keywords if keyword in desc), None)
)

# 统计金额
total_amounts = df_filtered.groupby('命中关键词')['tradeAmount'].sum()
if total_amounts.empty:
    return None
# 构造描述
desc_statements = ["用户自有至少一套房产"]  # 默认描述
df_filtered.sort_values(by='tradeTime', ascending=False, inplace=True)
for keyword, amount in total_amounts.items():
    rounded_amount = round(amount, 2)
    desc_statements.append(f"已缴纳{keyword}{rounded_amount}元")
desc = ",".join(desc_statements) + "。"
# 返回结果
return {
    "desc": desc,
    "tradeNos": df_filtered['tradeNo'].tolist()
}

税收

def rule_good_tax_adjustment(df1: pd.DataFrame, **kwargs) -> t.Optional[t.Dict]:
SJ1=df1.copy()
SJ1=SJ1[SJ1['counterparty'].str.contains("国家税务总局|省税务局")]
#SJ1=SJ1[~SJ1['tradeDesc'].str.contains("医保|社保|养老|医疗")]
#SJ1=SJ1[(SJ1['tradeAmount']!=380)&(SJ1['tradeAmount']!=400)]
SJ1=SJ1[(SJ1['tradeType']"支出")|(SJ1['tradeType']"其他")]
amount=SJ1['tradeAmount'].sum()
SJ1.sort_values(by='tradeTime',ascending=False,inplace=True)
if amount>0:
amount = round(amount, 2)
return dict(desc=f"用户存在补税、医保、社保等缴税,已缴纳共计{amount}元税款。",
tradeNos=SJ1['tradeNo'].tolist())
else:
return None

超大额资金

def rule_bad_bigamount(df1: pd.DataFrame, **kwargs) -> t.Optional[t.Dict]:
SJ1=df1.copy()
SJ1=SJ1[(SJ1['tradeAmount']>=10000)&(SJ1['tradeFunc']!='')]
SJ1.sort_values(by='tradeTime',ascending=False,inplace=True)
if SJ1.empty:
return None
else:
count=len(SJ1)
return dict(desc=f"用户存在共{count}次单笔消费大于1万账单记录,请根据具体消费场景判断资金用途。",
tradeNos=SJ1['tradeNo'].tolist())

亲属交易占比过高

def rule_bad_relative(dataframe):
# 移除交易对象中的空白字符
dataframe['counterparty'] = dataframe['counterparty'].str.strip()
# 定义亲属关键词
keywords=["母后","母亲大人","母上大人","母亲","妈","妈妈","老妈","妈咪","发给妈妈","发给妈","A妈","发给老妈","A妈妈","老妈子","我妈","美女妈","俺妈","孩子妈","父皇","父亲",
"爸","爸爸","老爸","A爸爸","爸比","发给爸","我爸爸","发给爸爸","孩子爸","发给老爸","老爸。","哥","哥哥","嫂子","大哥","二哥","三哥","弟","弟弟","老弟","小老弟","小弟","三弟","姐","姐姐",
"大姐","二姐","老姐","妹妹","老妹","妹","爷爷","奶奶",'爹','老爹','老娘','娘','爹地',"舅舅","小姨","小舅","舅妈","大舅","四舅","阿姨","大姨","二姨","老舅","三姨","老姨",
"孩他爸","孩他妈","孩她爸","孩她妈","孩子他爸","A老爹","A老公","老公","老公❤️","发给老公","亲亲老公","老婆","老婆大人","发给老婆","老婆❤️","宝贝老婆","亲爱的老婆","A老婆","亲亲老婆❤️","男朋友",
"娘子","妻子","亲爱的","亲爱的媳妇","媳妇","媳妇儿","媳妇❤️","发给媳妇","小爹","小妈","小妈❤️","继母","继父","小妈.","继父.","后妈."]
# 筛选亲属交易且交易描述包含"红包"或"转账"的收入交易
relative_transactions = dataframe[
dataframe['counterparty'].isin(keywords) &
dataframe['tradeDesc'].str.contains("红包|转账") &
(dataframe['tradeType'] == '收入')
]
# 如果没有亲属收入交易,则返回None
if relative_transactions.empty:
return None

# 计算亲属收入总金额
relative_income_total = round(relative_transactions['tradeAmount'].sum(),2)

# 计算总收入
total_income = round(dataframe[dataframe['tradeType'] == '收入']['tradeAmount'].sum(),2)

# 计算亲属交易收入占总收入的比例
relative_income_ratio = relative_income_total / total_income if total_income > 0 else 0

# 计算亲属交易次数
relative_transactions_count = relative_transactions.shape[0]
relative_transactions.sort_values(by='tradeTime',ascending=False,inplace=True)
if relative_income_ratio>=0.3:
    return dict(desc=f"用户总收入为{total_income}元,其中亲属交易收入总金额为{relative_income_total}元,收入次数{relative_transactions_count}次,占比总收入{relative_income_ratio:.2%},请对用户收入来源注意评估。",
                tradeNos=relative_transactions['tradeNo'].tolist())
else:
    return None

司法风险

import pandas as pd
def rule_bad_legal_risk(df1: pd.DataFrame, **kwargs) -> t.Optional[t.Dict]:
SJ1=df1.copy()
SJ1=SJ1[SJ1['counterparty'].str.contains("法院")]
SJ1=SJ1[SJ1['tradeDesc'].str.contains("司法")]
total_amounts = SJ1.groupby('tradeDesc')['tradeAmount'].sum()
desc_statements = ["用户存在司法风险"] # 先添加这个描述到列表的开始位置
SJ1.sort_values(by='tradeTime',ascending=False,inplace=True)
for keyword, amount in total_amounts.items():
rounded_amount = round(amount, 2)
desc_statements.append(f"{keyword}{rounded_amount}元") # 添加描述,但不要在这里加逗号或句号

# 使用逗号分隔除了第一个元素外的所有元素,并在整个字符串末尾加上句号
desc = desc = ",".join(desc_statements) + "。"
return dict(desc=desc,
            tradeNos=SJ1['tradeNo'].tolist()) if desc_statements != ["用户存在司法风险"] else None

娱乐消费

def rule_good_entertainment_spending(df1: pd.DataFrame, **kwargs) -> t.Optional[t.Dict]:
df2=df1.copy()
# 定义关键词
exclude_keywords = ["按摩椅", "共享按摩"]
included_keywords = ["KTV", "SPA", "按摩", "酒吧", "养生", "足道", "足浴"]
target_counterparty = "抖音生活服务商家"

# 筛选出不包含排除关键词的记录
df2=df2[(df2['tradeAmount']>=20)&(df2['tradeType']=='支出')]
df_filtered = df2[~df2['counterparty'].str.contains('|'.join(exclude_keywords), na=False)]
df_filtered = df_filtered[~df_filtered['tradeDesc'].str.contains('|'.join(exclude_keywords), na=False)]

# 筛选符合条件的记录
# 条件1: 交易对象包含指定关键词
condition1 = df_filtered['counterparty'].str.contains('|'.join(included_keywords), na=False)

# 条件2: 交易对象为“抖音生活服务商家”且交易描述包含指定关键词
condition2 = (df_filtered['counterparty'] == target_counterparty) & df_filtered['tradeDesc'].str.contains('|'.join(included_keywords), na=False)

# 合并条件
df_result = df_filtered[condition1 | condition2]
df_result.sort_values(by='tradeTime',ascending=False,inplace=True)
# 检查结果是否为空
if df_result.empty:
    return None
# 返回结果

amount=df_result['tradeAmount'].sum()
if amount>3000:
    amount = round(amount, 2)
    return dict(
    desc=f"用户在足浴、按摩、KTV、酒吧等娱乐消费场景消费{len(df_result)}次,共消费{amount}元",
    tradeNos=df_result['tradeNo'].tolist())
else:
    return None

消遣娱乐

import pandas as pd
def find_first_keyword(text, keywords):
if text is None:
return None
for keyword in keywords:
if keyword in text:
return keyword
return None
def rule_bad_recreation_entertainment(df):
df1=df.copy()
date=pd.DataFrame({})
df1=df1[df1['tradeType']=='支出']
df_result=pd.DataFrame({})
scenarios = {
"网游": {
"keywords": ["腾讯计算机", "腾讯天游", "网易藏宝阁", "网易游戏", "网易雷火", "上海米哈游", "盛天网络",
"巨人网络","江西贪玩", "竞技世界", "益世界", "莉莉丝游戏", "边锋网络", "Steam"],
"min_amount": 2000
},
"直播打赏": {
"keywords": [],
"min_amount": 1000,
"conditions": [
(df1['counterparty'].str.contains('抖音')) & (df1['tradeDesc'].str.contains('抖币')),
(df1['counterparty'].str.contains('微播')) & (df1['tradeDesc'].str.contains('抖币')),
(df1['counterparty'].str.contains('快手')) & (df1['tradeDesc'].str.contains('快币')),
(df1['counterparty'].str.contains('YY直播')) & (df1['tradeDesc'].str.contains('充值')),
(df1['counterparty'] == '九秀直播'),
(df1['counterparty'] == '花椒直播'),
(df1['counterparty'] == '广州虎牙信息科技有限公司'),
(df1['counterparty'] == '斗鱼直播'),
(df1['counterparty'] == '奇秀直播'),
(df1['counterparty'] == '觅秀直播'),
(df1['counterparty'] == '映客直播')
]
},
"社交软件": {
"keywords": ["陌陌", "觅伊", "Tinder", "探探", "Soul", "世纪佳缘", "百合网", "珍爱网",
"有缘网", "花椒直播", "Blued", "Grindr"],
"min_amount": 1000
},
"博彩类": {
"keywords": ["彩票","福彩","体彩"],
"min_amount": 1000
}
}
descriptions = []
for scenario, params in scenarios.items():
if 'conditions' in params:
df1_filtered = pd.concat([df1[condition] for condition in params['conditions']], ignore_index=True)
else:
df1_filtered = df1[df1['counterparty'].apply(lambda x: find_first_keyword(x, params['keywords'])).notna()]

    if not df1_filtered.empty:
        df_result = pd.concat([df_result, df1_filtered], ignore_index=True)
        total_count = len(df1_filtered)
        total_amount = df1_filtered['tradeAmount'].sum()
        if total_amount >= params['min_amount']:
            desc = f"{scenario}消费{total_count}次,消费总金额{total_amount:.2f}。"
            descriptions.append(desc)
            date=pd.concat([date,df1_filtered])
if descriptions:
    desc="此用户具有大量消遣娱乐消费。 " + " ".join(descriptions)
    df_result.sort_values(by='tradeTime',ascending=False,inplace=True)
    return dict(desc=desc,tradeNos=date['tradeNo'].tolist())
else:
    return None

优质网购

def rule_good_Shopping(df):
df1=df.copy()
# 筛选支出类型的交易
df_filtered = df1[df1['tradeType'] == "支出"]
# 定义电商关键词
ds = ["淘宝", "tb", "抖音小店", "抖音生活", "拼多多", "京东商城", "京东到家", "唯品会", "苏宁易购"]
# 提取电商相关的交易对象
df_filtered['type'] = df_filtered['counterparty'].str.extract('(' + '|'.join(ds) + ')', expand=False)
df_filtered['tradeDesc'].fillna('', inplace=True)

# 筛选出与电商相关的交易描述
df_filtered = df_filtered[~df_filtered['tradeDesc'].str.contains('现金|提现|店|运费|补偿金')]
if df_filtered.empty:
    return None
df_filtered = df_filtered[df_filtered['type'].notna()]
if df_filtered.empty:
    return None
# 统计电商网购总次数
total_count = len(df_filtered)

# 统计金额大于200元的次数
amount_filtered = df_filtered[df_filtered['tradeAmount'] >= 200]
amount_count = len(amount_filtered)

# 计算金额大于200元的占比
if total_count > 0:
    percentage = amount_count / total_count * 100
else:
    percentage = 0
# 判断是否满足展示条件
if total_count >= 10 and percentage >= 30:
    desc=f"用户在电商网购消费{total_count}次,其中单笔大于200元次数{amount_count}次,占比{percentage:.0f}%。"
    df_filtered.sort_values(by='tradeTime',ascending=False,inplace=True)
    description = dict(desc=desc,
                       tradeNos=df_filtered['tradeNo'].tolist())
    return description
else:
    return None

资金饥渴

import pandas as pd
def rule_bad_funding_shortage(df1: pd.DataFrame, **kwargs) -> t.Optional[t.Dict]:
SJ1=df1.copy()
SJ1=SJ1[(SJ1['tradeDesc']=="转账")]
wy = ["POS","花呗","借呗","白条","信用卡","网贷","分付","公积金","资金","债务","套现","回款","公积金","信用","信贷","贷款","月付",'借条','还款','借贷']
def find_first_keyword(text, keywords):
if text is None:
return None
for keyword in keywords:
if keyword in text:
return keyword
return None
SJ1['type'] = SJ1['counterparty'].apply(lambda x: find_first_keyword(x, wy))
SJ1=SJ1[SJ1['type'].notna()]
if SJ1.empty:
return None
SJ1=SJ1[~SJ1['counterparty'].str.contains('微信用户')]
if SJ1.empty:
return None
SJ1.sort_values(by='tradeTime',ascending=False,inplace=True)
# 使用逗号分隔除了第一个元素外的所有元素,并在整个字符串末尾加上句号
desc="用户可能存在套现、信贷等行为。请根据具体情况进行判断。"
return dict(desc=desc,tradeNos=SJ1['tradeNo'].tolist())

红包交易

def rule_bad_red_packet(df1: pd.DataFrame, **kwargs) -> t.Optional[t.Dict]:
SJ1 = df1.copy()
SJ1 = SJ1[SJ1['tradeDesc'] == "微信红包(群红包)"]
if SJ1.empty:
return None
# 将交易时间转换为日期格式,并计算每天的红包交易次数和金额
SJ1['tradeTime'] = pd.to_datetime(SJ1['tradeTime'])
SJ1['time'] = SJ1['tradeTime'].dt.strftime('%Y-%m-%d')
SJ1['tick_count'] = SJ1.groupby('time')['counterparty'].transform('count')
SJ1['tick_amount'] = SJ1.groupby('time')['tradeAmount'].transform('sum')
# 筛选出交易次数大于10次且交易金额大于1000的记录
bad_transactions = SJ1[(SJ1['tick_count'] >=10) & (SJ1['tick_amount'] >=1000)]
bad_transactions.sort_values(by='tradeTime',ascending=False,inplace=True)
if not bad_transactions.empty:
# 如果存在这样的记录,则返回描述性语句和交易编号列表
# 计算在几天内、最小消费次数和最小消费金额
days_involved = bad_transactions['time'].nunique()
min_count = len(bad_transactions)
min_amount = bad_transactions['tradeAmount'].sum()
# 构建描述性语句
desc = (
f"用户存在大量群红包交易流水,至少每天10次群红包支出记录,累计{days_involved}天、共计支出{min_count}次、累计支出金额{min_amount:.2f}元。")
return dict(desc=desc,tradeNos=bad_transactions['tradeNo'].tolist())
else:
return None

支付宝余额宝理财

def round_to_range(number):
# 四舍五入到最接近的百位数
if number < 100:
number += 100
rounded_number = round(number / 100) * 100
return rounded_number
def rule_good_alipay_yueb_transactions(df):
df1=df.copy()
df2=df.copy()
df1=df1[df1['tradeDesc'].str.contains('余额宝')]
df1=df1[df1['tradeFunc']!='']
df2=df2[df2['tradeFunc']!='']
if df1.empty:
return None
yuebao_in = df1[(df1['tradeDesc'].str.contains('余额宝')) & (df1['tradeDesc'].str.contains('入|至余额宝|到余额宝|工资理财'))]['tradeAmount'].sum()
yuebao_out = df1[(df1['tradeDesc'].str.contains('余额宝')) & (df1['tradeDesc'].str.contains('出'))]['tradeAmount'].sum()
yuebaoincome=df2[(df2['tradeFunc'].str.contains('余额宝'))&((df2['tradeType']"收入")|(df2['tradeDesc'].str.contains('退款')))]['tradeAmount'].sum()
yuebaooutcome=df2[(df2['tradeFunc'].str.contains('余额宝'))&((df2['tradeType']
"支出")|(df2['tradeDesc'].str.contains('还款|归还|转账到银行卡')))]['tradeAmount'].sum()
overamount=yuebaooutcome-yuebaoincome
used_amount=yuebao_in-yuebao_out-overamount
yuebao_in_tradeNos = df1[(df1['tradeDesc'].str.contains('余额宝')) & (df1['tradeDesc'].str.contains('入|至余额宝|到余额宝|工资理财'))]['tradeNo'].tolist()
yuebao_out_tradeNos = df1[(df1['tradeDesc'].str.contains('余额宝')) & (df1['tradeDesc'].str.contains('出'))]['tradeNo'].tolist()
yuebaofunctradeNos = df2[(df2['tradeFunc'].str.contains('余额宝'))&((df2['tradeType']"收入")|(df2['tradeType']"支出")|(df2['tradeDesc'].str.contains('还款|归还|退款|转账到银行卡')))]['tradeNo'].tolist()
# 合并交易编号列表
tradeNos = yuebao_in_tradeNos + yuebao_out_tradeNos+yuebaofunctradeNos
descriptions = []
if used_amount <1000:
return None
if yuebao_in > 0:
descriptions.append(f"转入余额宝{yuebao_in:.2f}元")
if yuebao_out > 0:
descriptions.append(f"从余额宝转出{yuebao_out:.2f}元")
if overamount>0:
descriptions.append(f"以余额宝作为交易方式支出{overamount:.2f}元")
if overamount<0:
descriptions.append(f"以余额宝作为交易方式收入{-overamount:.2f}元")
if used_amount >=1000:
descriptions.append(f"预计余额宝剩余金额大于{round_to_range(used_amount)}元。")
return dict(desc=",".join(descriptions),tradeNos=tradeNos)

支付宝余利宝理财

def rule_good_alipay_yulib_transactions(df):
df1=df.copy()
df1=df1[df1['tradeDesc'].str.contains('余利宝')]
if df1.empty:
return None
yulibao_in = df1[(df1['tradeDesc'].str.contains('余利宝')) & (df1['tradeDesc'].str.contains('入'))]['tradeAmount'].sum()
yulibao_out = df1[(df1['tradeDesc'].str.contains('余利宝')) & (df1['tradeDesc'].str.contains('出'))]['tradeAmount'].sum()
yulibaoamount=yulibao_in-yulibao_out
yulibao_in_tradeNos = df1[(df1['tradeDesc'].str.contains('余利宝')) & (df1['tradeDesc'].str.contains('入'))]['tradeNo'].tolist()
yulibao_out_tradeNos = df1[(df1['tradeDesc'].str.contains('余利宝')) & (df1['tradeDesc'].str.contains('出'))]['tradeNo'].tolist()
# 合并交易编号列表
tradeNos =yulibao_in_tradeNos + yulibao_out_tradeNos
descriptions = []
if yulibaoamount<1000:
return None
if yulibao_in > 0:
descriptions.append(f"转入余利宝{yulibao_in:.2f}元")
if yulibao_out > 0:
descriptions.append(f"从余利宝转出{yulibao_out:.2f}元")
if yulibaoamount>=1000:
descriptions.append(f"预计余利宝剩余金额大于{round_to_range(yulibaoamount)}元。")
return dict(desc=",".join(descriptions),tradeNos=tradeNos)

零钱通理财

def rule_good_wechat_lqiant_transactions(df):
df1=df.copy()
df1=df1[df1['tradeDesc'].str.contains('零钱通')]
df1=df1[df1['tradeFunc']!='']
df2=df.copy()
df2=df2[df2['tradeFunc']!='']
if df1.empty:
return None
lqian_in = round(df[(df['tradeDesc'].str.contains('零钱通')) & (df['tradeDesc'].str.contains('入'))]['tradeAmount'].sum(),2)
lqian_out = round(df[(df['tradeDesc'].str.contains('零钱通')) & (df['tradeDesc'].str.contains('出'))]['tradeAmount'].sum(),2)
lqian_in_tradeNos=df[(df['tradeDesc'].str.contains('零钱通')) & (df['tradeDesc'].str.contains('入'))]['tradeNo'].tolist()
lqian_out_tradeNos=df[(df['tradeDesc'].str.contains('零钱通')) & (df['tradeDesc'].str.contains('出'))]['tradeNo'].tolist()
lqianincome=df2[(df2['tradeFunc'].str.contains('零钱通'))&((df2['tradeType']"收入"))]['tradeAmount'].sum()
lqianoutcome=df2[(df2['tradeFunc'].str.contains('零钱通'))&((df2['tradeType']
"支出")|(df2['tradeDesc'].str.contains('还款|购买理财通')))]['tradeAmount'].sum()
lqiantradeNo=df2[(df2['tradeFunc'].str.contains('零钱通'))&((df2['tradeType']"收入")|(df2['tradeType']"支出")|(df2['tradeDesc'].str.contains('还款|购买理财通')))]['tradeNo'].tolist()
overamount=lqianoutcome-lqianincome
used_amount=lqian_in-lqian_out-overamount
tradeNos=lqian_in_tradeNos+lqian_out_tradeNos+lqiantradeNo
descriptions = []
df1.sort_values(by='tradeTime',ascending=False,inplace=True)
if used_amount <1000:
return None
if lqian_in > 0:
descriptions.append(f"转入零钱通{lqian_in}元")
if lqian_out > 0:
descriptions.append(f"从零钱通转出{lqian_out}元")
if overamount>0:
descriptions.append(f"以零钱通作为交易方式支出{overamount:.2f}元")
if overamount<0:
descriptions.append(f"以零钱通作为交易方式收入{-overamount:.2f}元")
if used_amount >=1000:
descriptions.append(f"预计零钱通剩余金额大于{round_to_range(used_amount)}元。")
return dict(desc=",".join(descriptions),tradeNos=tradeNos)

基金交易

def rule_good_fund_transactions(df):
df1=df.copy()
df1=df1[df1['counterparty17'].str.contains('基金销售')]
SJ2=df1.copy()
SJ2.fillna('',inplace=True)
outmoney=SJ2[SJ2['tradeDesc'].str.contains('退款|卖出|转出')]['tradeAmount'].sum()
money=round(df1['tradeAmount'].sum()-outmoney,2)
df1.sort_values(by='tradeTime',ascending=False,inplace=True)
if money>=500:
lens=len(df1)
return dict(desc=f"用户存在基金交易记录,共计{lens}次交易记录,共计金额{money}元。",tradeNos=df1['tradeNo'].tolist())
else:
return None

余额宝收益

def round_to_range1(number):
# 四舍五入到最接近的百位数
if number < 100:
number += 100
rounded_number = round(number / 100) * 100
# 计算范围
lower_bound = rounded_number - 100
upper_bound = rounded_number + 100
return f"{lower_bound}~{upper_bound}"
def rule_good_yuebao_transactions(df):
df1=df.copy()
df1=df1[df1['tradeDesc'].str.contains('余额宝')]
if df1.empty:
return None
df1=df1[df1['tradeDesc'].str.contains('收益发放')]
if df1.empty:
return None
df1.sort_values(by='tradeTime',ascending=False,inplace=True)
first_row_head = df1.head(1)
maxmoney = first_row_head['tradeAmount'].max()
count = len(df1)
summoney = round(df1['tradeAmount'].sum(),2)
avg = summoney/count
fund=round(maxmoney36550,2)
if fund>=800:
fund=round_to_range1(fund)
return dict(desc=f"用户存在余额宝收益,累计收益{count}次。累计收益金额{summoney}元,日均收益{avg:.2f}元,预估余额宝余额大约为{fund}元。",tradeNos=df1['tradeNo'].tolist())
else:
return None

抵押贷款

import pandas as pd
def rule_bad_mortgage(df1: pd.DataFrame, **kwargs) -> t.Optional[t.Dict]:
SJ1=df1.copy()
wy = ["抵押","房贷","车贷","房抵","车抵","押车","押证"]
def find_first_keyword(text, keywords):
if text is None:
return None
for keyword in keywords:
if keyword in text:
return keyword
return None
SJ1['type'] = SJ1['counterparty'].apply(lambda x: find_first_keyword(x, wy))
SJ1=SJ1[SJ1['type'].notna()]
if SJ1.empty:
return None
SJ1=SJ1[SJ1['tradeAmount']>=10]
if SJ1.empty:
return None
SJ1.sort_values(by='tradeTime',ascending=False,inplace=True)
# 使用逗号分隔除了第一个元素外的所有元素,并在整个字符串末尾加上句号
desc="用户可能存在房抵、车抵等抵押行为。请根据具体情况进行判断。"
return dict(desc=desc,tradeNos=SJ1['tradeNo'].tolist())

自我交易

def rule_bad_self_trade(SJ):
# 筛选出交易描述包含"转账"的记录
SJ2 = SJ[SJ['tradeDesc'].str.contains("转账")&~SJ['tradeDesc'].str.contains("转账到银行卡")]
# 定义关键词列表
keywords = ["备用号", "我", "小号", "小号2", "本人小号", "自己"]
# 清理交易对方的字符串,并筛选出交易对方在关键词列表中的记录
SJ2['counterparty'] = SJ2['counterparty'].str.strip()
SJ2 = SJ2[SJ2['counterparty'].isin(keywords)]
# 获取姓名列中非空的最大值
SJ3 = SJ[SJ['tradeDesc'].str.contains("转账")&~SJ['tradeDesc'].str.contains("转账到银行卡")]
SJ3['name'] = SJ3['name'].str.strip() # 清理姓名列的空白字符
name=SJ3['name'].max()
# 如果存在有效的姓名,筛选交易对方名称中包含个人姓名的记录
if pd.notna(name) and len(name)>1:
SJ4 = SJ3[SJ3['counterparty'].str.contains(name)]
relevant_data = pd.concat([SJ2, SJ4]).drop_duplicates().reset_index(drop=True)
else:
relevant_data=SJ2
if relevant_data.empty:
return None
return dict(desc="用户疑似存在多账号自我交易",tradeNos=relevant_data['tradeNo'].tolist())

疑似借条

def filter_transactions(dataframe):
# 确保交易时间是datetime类型
dataframe['ymd'] = pd.to_datetime(dataframe['ymd'])

# 初始化一个空列表用于存放符合条件的行
valid_rows = []

# 按交易对象和交易金额分组
grouped = dataframe.groupby(['counterparty', 'tradeAmount'])

for name, group in grouped:
    # 对每个分组按交易时间排序
    group = group.sort_values('ymd')

    # 计算交易时间差
    time_diffs = (group['ymd'] - group['ymd'].shift(1)).dt.days.dropna().tolist()

    # 初始化变量来跟踪连续模式的次数
    current_interval = None
    current_count = 0
    start_index = 0  # 连续模式的起始索引

    # 遍历时间差列表,寻找连续模式
    for i, diff in enumerate(time_diffs):
        if diff in (1,2,3,4,5,6,7,10,14,15):
            if diff == current_interval:
                # 如果当前时间间隔与之前相同,增加计数
                current_count += 1
            else:
                # 如果当前时间间隔与之前不同,检查之前的连续模式是否满足条件
                if current_count >= 3:  # 至少连续2次符合要求(即三次交易)
                    valid_rows.append(group.iloc[start_index:i+1])  # 保存符合条件的段
                # 重置计数和起始索引
                current_interval = diff
                current_count = 1
                start_index = i  # 更新起始索引为当前索引

        else:
            # 如果当前时间差不在(1, 2, 3)内,检查之前的连续模式是否满足条件
            if current_count >= 3:
                valid_rows.append(group.iloc[start_index:i+1])  # 保存符合条件的段
            # 重置
            current_interval = None
            current_count = 0
            start_index = i  # 更新起始索引
    # 检查最后一组连续模式是否满足条件
    if current_count >= 3:
        valid_rows.append(group.iloc[start_index:])
# 将符合条件的行合并为一个DataFrame并返回
return pd.concat(valid_rows, axis=0) if valid_rows else pd.DataFrame(columns=dataframe.columns)

def rule_bad_rent_offline(df1):
SJ1=df1.copy()
SJ2=df1.copy()
SJ1['ymd'] = pd.to_datetime(SJ1['ymd'])
SJ1=SJ1[(SJ1['tradeType']"支出")&(SJ1['tradeAmount']>150)&((SJ1['tradeDesc']"转账")|(SJ1['tradeDesc']"微信红包(单发)")|(SJ1['tradeDesc']"扫二维码付款"))]
SJ1['count']=SJ1.groupby(['counterparty','tradeAmount','ymd'])['counterparty'].transform('count')
SJ1['amxcount']=SJ1.groupby('counterparty')['count'].transform('max')
SJ1=SJ1[SJ1['amxcount']==1]
SJ1['count1']=SJ1.groupby('counterparty')['count'].transform('count')
SJ1=SJ1[(SJ1['count1']>=3)]
if SJ1.empty:
return None
filtered_df = filter_transactions(SJ1)
if filtered_df.empty:
return None
#filtered_df.drop_duplicates(subset='counterparty',inplace=True)
#zt=SJ2.merge(filtered_df[['counterparty','count1']],on='counterparty',how='inner')
return dict(desc="用户存在固定金额高频支出行为,该场景与线下借贷付款契合度较高,请留意甄别。",tradeNos=filtered_df['tradeNo'].tolist())

频繁交易

import pandas as pd
def frequent_transactions(dataframe, time_range, min_transactions, min_amount):
# 确保交易时间是datetime类型
dataframe['ymd'] = pd.to_datetime(dataframe['ymd']).dt.date
# 初始化一个空的DataFrame用于存放结果
result_df = pd.DataFrame(columns=dataframe.columns)
# 按用户id和交易对象分组
grouped = dataframe.groupby('counterparty')
for (counterparty),group in grouped:
# 按交易时间倒序排序
group = group.sort_values(by='ymd', ascending=True)
# 获取所有交易的时间列表
transaction_times = group['ymd'].tolist()

    # 遍历每个交易,从最近的交易开始,计算每个时间窗口
    for i, transaction_time in enumerate(transaction_times):
        # 当前交易时间
        start_time = transaction_time  # 时间窗口的开始
        end_time = transaction_time + pd.Timedelta(days=time_range)  # 时间窗口的结束点(当前交易时间)

        # 在这个时间范围内筛选交易
        time_window_group = group[(group['ymd'] >= start_time) & (group['ymd'] <= end_time)]

        # 计算交易次数和总金额
        total_transactions = len(time_window_group)
        total_amount = time_window_group['tradeAmount'].sum()

        # 如果满足条件,拼接到结果中
        if total_transactions >= min_transactions and total_amount >= min_amount:
            result_df = pd.concat([result_df, time_window_group], axis=0)
            break
# 返回拼接后的结果DataFrame
return result_df

def rule_bad_frequent_trade(df1):
SJ1=df1.copy()
SJ1['ymd'] = pd.to_datetime(SJ1['ymd'])
SJ1=SJ1[(SJ1['tradeAmount']>100)&((SJ1['tradeDesc']"转账")|(SJ1['tradeDesc']"微信红包(单发)")|(SJ1['tradeDesc']=="扫二维码付款"))]
SJ1['count']=SJ1.groupby(['counterparty'])['counterparty'].transform('count')
SJ1=SJ1[SJ1['count']>=20]
if SJ1.empty:
return None
filtered_df = frequent_transactions(SJ1, time_range=7, min_transactions=20, min_amount=10000)
if filtered_df.empty:
return None
return dict(desc="用户存在短期高频高额交易行为,请留意。",tradeNos=filtered_df['tradeNo'].tolist())

def rule_bad_short_wechat(df1):
SJ1=df1.copy()
SJ1=SJ1[SJ1['flowType']=='WECHAT']
if SJ1.empty:
return None
percent_str1 = SJ1['wechatactivetare'].max()
percent_str2 = 70
# 移除百分号,并将字符串转换为整数
if percent_str1>=percent_str2:
return None
else:
return dict(desc="微信账单活跃率较低,请留意。",)

def rule_bad_short_alipay(df1):
SJ1=df1.copy()
SJ1=SJ1[SJ1['flowType']=='ALIPAY']
if SJ1.empty:
return None
percent_str1 = SJ1['alipayactivetare'].max()
percent_str2 = 25
# 移除百分号,并将字符串转换为整数
if percent_str1>=percent_str2:
return None
else:
return dict(desc="支付宝账单活跃率较低,请留意。",)

刷流水

def rule_bad_freque_trade(df1):
SJ=df1.copy()
# 筛选单笔交易金额大于100的数据
df_frequent1 = SJ[SJ['tradeAmount'] > 100]
# 筛选出trade_desc字段包含“转账”、“红包”、“二维码收款”、“扫二维码付款”其中之一的数据
df_frequent1 = df_frequent1[df_frequent1['tradeDesc'].str.contains('转账|红包|二维码收款|扫二维码付款', na=False)]
# 计算每个交易对手的交易次数大于10的记录
df_frequent1['counterparty_count'] = df_frequent1.groupby('counterparty')['counterparty'].transform('count')
df_frequent2 = df_frequent1[df_frequent1['counterparty_count'] > 10]
if df_frequent2.empty:
return None
# 同一用户与同一交易对象在同一天交易次数>10的数据
df_frequent2['trade_date'] = pd.to_datetime(df_frequent2['ymd']).dt.date
df_frequent2['transaction_count'] = df_frequent2.groupby(['counterparty', 'trade_date'])['tradeNo'].transform('count')
df_frequent3 = df_frequent2[df_frequent2['transaction_count'] > 10]
# 交易类型包含“收入”和“支出”
def contains_income_and_expenditure(group):
return '收入' in group['tradeType'].values and '支出' in group['tradeType'].values
df_frequent4 = df_frequent3.groupby(['counterparty', 'trade_date']).filter(contains_income_and_expenditure)
# 总交易金额大于1000
df_frequent4['total_trade_amount'] = df_frequent4.groupby(['counterparty', 'trade_date'])['tradeAmount'].transform('sum')
df_frequent5 = df_frequent4[df_frequent4['total_trade_amount'] > 1000]
if df_frequent5.empty:
return None
return {
"desc": "用户疑似存在与同一对象短期内往来频繁交易",
"tradeNos": df_frequent5['tradeNo'].tolist()}

账单活跃时间较短

def rule_bad_wechattime(df1):
SJ1=df1.copy()
SJ1=SJ1[SJ1['flowType']=='WECHAT']
if SJ1.empty:
return None
percent_str1 = SJ1['wechatday'].max()
# 移除百分号,并将字符串转换为整数
if percent_str1>=180:
return None
else:
return dict(desc="""

微信账单时间不足6个月,请引导客户选择近一年时间账单重新投递

""")

def rule_bad_alitime(df1):
SJ1=df1.copy()
SJ1=SJ1[SJ1['flowType']=='ALIPAY']
if SJ1.empty:
return None
percent_str1 = SJ1['aliday'].max()
# 移除百分号,并将字符串转换为整数
if percent_str1>=180:
return None
else:
return dict(desc="""

支付宝账单时间不足6个月,请引导客户选择近一年时间账单重新投递

""")

def rule_bad_wechatcurrent(df1):
SJ1=df1.copy()
SJ1=SJ1[SJ1['flowType']=='WECHAT']
if SJ1.empty:
return None
percent_str1 = SJ1['wechatdatediff'].max()
# 移除百分号,并将字符串转换为整数
if percent_str1<7:
return None
else:
return dict(desc="""

微信账单不包含近7天数据,请引导客户选择近期数据重新投递

""")

def rule_bad_alipaycurrent(df1):
SJ1=df1.copy()
SJ1=SJ1[SJ1['flowType']=='ALIPAY']
if SJ1.empty:
return None
percent_str1 = SJ1['alipaydatediff'].max()
# 移除百分号,并将字符串转换为整数
if percent_str1<7:
return None
else:
return dict(desc="""

支付宝账单不包含近7天数据,请引导客户选择近期数据重新投递

""")

def rule_bad_normalrent(df1):
SJ1=df1.copy()
SJ1['ymd'] = pd.to_datetime(SJ1['ymd'])
latest_transaction_time = SJ1['ymd'].max()
one_month_ago = latest_transaction_time - timedelta(days=15)
recent_transactions = SJ1[SJ1['ymd'] >= one_month_ago]
scene_transactions = recent_transactions[recent_transactions['specialTradehHitReason'].str.contains('借条')]
rentlength=len(scene_transactions)
if rentlength>0:
return dict(desc=f"用户最近存在民间借贷行为,请留意。",
tradeNos=scene_transactions['tradeNo'].tolist())
else:
return None

定义一个字典,将规则名称映射到函数

rule_functions = {
"alipay_hb_overdue": rule_bad_alipay_hb_overdue,
"high_repayment_stress":rule_bad_high_repayment_stress,
"car_self":rule_good_car_self,
"home_self":rule_good_home_self,
"tax_adjustment":rule_good_tax_adjustment,
"bigamount":rule_bad_bigamount,
"relative":rule_bad_relative,
"legal_risk":rule_bad_legal_risk,
"entertainment_spending":rule_good_entertainment_spending,
"recreation_entertainment":rule_bad_recreation_entertainment,
"Shopping":rule_good_Shopping,
"funding_shortage":rule_bad_funding_shortage,
"red_packet":rule_bad_red_packet,
"alipay_yueb_transactions":rule_good_alipay_yueb_transactions,
"wechat_lqiant_transactions":rule_good_wechat_lqiant_transactions,
"fund_transactions":rule_good_fund_transactions,
"yuebao_transactions":rule_good_yuebao_transactions,
"mortgage":rule_bad_mortgage,
"self_trade":rule_bad_self_trade,
"rent_offline":rule_bad_rent_offline,
"frequent_trade":rule_bad_frequent_trade,
"short_wechat":rule_bad_short_wechat,
"short_alipay":rule_bad_short_alipay,
"freque_trade":rule_bad_freque_trade,
"wechattime":rule_bad_wechattime,
"alitime":rule_bad_alitime,
'wechatcurrent':rule_bad_wechatcurrent,
'alipaycurrent':rule_bad_alipaycurrent,
'normalrent':rule_bad_normalrent
}

运行函数

def run_rule_engine(rules: t.List[t.Dict], df: pd.DataFrame) -> t.List[t.Dict]:
rule_results: t.List[t.Dict] = []

for rule in rules:
    if not rule:
        continue
    try:
        rule_name = f"{rule['code']}"
        # 根据规则名称获取对应的函数
        rule_func = rule_functions.get(rule_name)
        if not rule_func:
            logging.error(f"Rule function for {rule_name} not found.")
            continue

        try:
            kwargs = json.loads(rule.get("kwargs", "{}"))
        except json.JSONDecodeError:
            # 如果kwargs不是有效的JSON,则传递一个空字典
            kwargs = {}

        # 直接调用规则函数
        df1=df.copy()
        rs = rule_func(df1, **kwargs)
        if rs:
            #status = 0 if rs.get("desc", "").strip() else 1
            item = dict(
                type= "1" if rule["type"]=="good" else "2" if rule["type"]=="bad" else "3",
                riskName=rule["risk_name"],
                #tips=rule["tips"] if rule.get("tips") else rs.get("tips", ""),
                riskInterpret=rs.get("desc", ""),
                isRisk="0",
                #dates=rs.get("date", ""),
                tradeNo=rs.get("tradeNos", ""),
                remark="https://regai.oss-cn-chengdu.aliyuncs.com/flows/risk-label/微信不足六个月.png" if rule["risk_name"]=="微信账单时间过短" 
                else "https://regai.oss-cn-chengdu.aliyuncs.com/flows/risk-label/支付宝不足六个月.png" if rule["risk_name"]=="支付宝账单时间过短" 
                else "https://regai.oss-cn-chengdu.aliyuncs.com/flows/risk-label/微信数据不足7天.png" if rule['risk_name']=="微信账单非近期"
                else "https://regai.oss-cn-chengdu.aliyuncs.com/flows/risk-label/支付宝账单不包含7天数据.png" if rule['risk_name']=="支付宝账单非近期" else "")
            rule_results.append(item)
    except Exception as e:
        logging.error(e)
return rule_results