网站更新内容:请访问: https://bigdata.ministep.cn/

存活时间换算成小时数

日期格式化处理

import re
import datetime
from dateutil.relativedelta import relativedelta
UTIL_CN_NUM = {
              u'零': 0,
              u'一': 1,
              u'二': 2,
              u'两': 2,
              u'三': 3,
              u'四': 4,
              u'五': 5,
              u'六': 6,
              u'七': 7,
              u'八': 8,
              u'九': 9,
             }
UTIL_CN_UNIT = {
              u'十': 10,
              u'百': 100,
              u'千': 1000,
              u'万': 10000,
             }

def cn2dig(src):
  if src == "":
      return None
  m = re.match("\d+", src)
  if m:
      return m.group(0)
  rsl = 0
  unit = 1
  for item in src[::-1]:
      if item in UTIL_CN_UNIT.keys():
          unit = UTIL_CN_UNIT[item]
      elif item in UTIL_CN_NUM.keys():
          num = UTIL_CN_NUM[item]
          rsl += num*unit
      else:
          return None
  if rsl < unit:
      rsl += unit
  return str(rsl)


def parse_datetime_timedelta(msg):
  if msg is None or len(msg) == 0:
      pass
  m = re.match(r"([0-9零一二两三四五六七八九十]+年)?([0-9一二两三四五六七八九十]+月)?([0-9一二两三四五六七八九十]+[号天日])?([上下午晚早]+)?([0-9零一二两三四五六七八九十百]+[点:\.时])?([0-9零一二三四五六七八九十百]+分?)?([0-9零一二三四五六七八九十百]+秒)?", msg)
  if m.group(0) is not None:
      res = {
          "years": m.group(1),
          "months": m.group(2),
          "days": m.group(3),
          "hours": m.group(5) if m.group(5) is not None else '00',
          "minutes": m.group(6) if m.group(6) is not None else '00',
          "seconds": m.group(7) if m.group(7) is not None else '00',
          # "microsecond": '00',
         }
      params = {}
      for name in res:
          if res[name] is not None and len(res[name]) != 0:
              params[name] = int(cn2dig(res[name][:-1]))
      target_date = datetime.datetime.today() + relativedelta(**params)
      is_pm = m.group(4)
      if is_pm is not None:
          if is_pm == u'下午' or is_pm == u'晚上':
              hour = target_date.time().hour
              if hour < 12:
                  target_date = target_date.replace(hour=hour+12)
      return target_date 
  else:
      return None   
if __name__ == "__main__":
  print (datetime.datetime.today())
  print ( parse_datetime_timedelta(u"1天1时"))

清洗数据-存活时间换算成小时数

df['total_hours'] = df['存活时间'].apply(lambda x:(parse_datetime_timedelta(x)-datetime.datetime.today()).total_seconds()/3600)

posted @ 2021-04-04 19:45  ministep88  阅读(120)  评论(0编辑  收藏  举报
网站更新内容:请访问:https://bigdata.ministep.cn/