from pandas_schema.validation import CustomElementValidation
validations = {
"not_blank": CustomElementValidation(
lambda x: x is not np.nan and len(str(x).strip()) > 0,
'cannot be blank'),
"not_empty_array": CustomElementValidation(
lambda x: x is not np.nan and len(str(x).strip().split(';')) > 0,
'cannot be empty array')
}
def drop_error_rows(dfs, errors):
if errors is None or len(errors) == 0:
return dfs
errors_index_rows = get_unique_errors_index_rows(errors)
if errors_index_rows[0] == -1:
return pd.DataFrame(columns=dfs.columns)
dfs = dfs.drop(index=errors_index_rows)
return dfs
https://mlog.club/article/5981725

from pandas_schema import Column, Schema
def check_string(sr):
try:
str(sr)
except InvalidOperation:
return False
return True
def check_datetime(self,dec):
try:
datetime.datetime.strptime(dec, self.date_format)
return True
except:
return False
def check_int(num):
try:
int(num)
except ValueError:
return False
return True
string_validation=[CustomElementValidation(lambda x: check_string(x).str.len()>5 ,'Field Correct')]
int_validation = [CustomElementValidation(lambda i: check_int(i), 'is not integer')]
contain_validation = [CustomElementValidation(lambda y: check_string(y) not in['type1','type2','type3'], 'Filed is correct')]
date_time_validation=[CustomElementValidation(lambda dt: check_datetime(dt).strptime('%m/%d/%Y %H:%M %p'),'is not a date
time')]
null_validation = [CustomElementValidation(lambda d: d is not np.nan, 'this field cannot be null')]
schema = Schema([
Column('CompanyID', string_validation + null_validation),
Column('initialdate', date_time_validation),
Column('customertype', contain_validation),
Column('ip', string_validation),
Column('customersatisfied', string_validation)])
errors = schema.validate(combined_df)
errors_index_rows = [e.row for e in errors]
pd.DataFrame({'col':errors}).to_csv('errors.csv')

浙公网安备 33010602011771号