import os, re, time, json
import email
from imaplib import IMAP4_SSL
# 如果要自动发
from smtplib import SMTP_SSL
from bs4 import BeautifulSoup
import openpyxl
from openpyxl import load_workbook
import requests
import execjs # pip install PyExecJS # 需要注意, 包的名称:PyExecJS
import time
class AutoEmail(object):
def __init__(self, account, host, password, maildir, oldmaildir):
# 配置邮箱账号信息
self.account = account
self.host = host
self.password = password
self.num = -1
self.maildir = maildir
self.oldmaildir = oldmaildir
def getemail(self):
try:
# 连接IMAP企业邮箱
email_conn = IMAP4_SSL(self.host)
# 登入邮箱
email_conn.login(user=self.account, password=self.password)
return email_conn
except BaseException as e:
print("Connect to {0} failed".format(self.host), e)
def savefile(self, filename, data, path):
"""
保存带附件的邮件,根据时间创建文件夹保存
:param filename: 保存的文件名
:param data: 数据
:param path: 保存的路径
:return:
"""
pass
def emailfolder(self):
email_conn = self.getemail()
folder = []
for i in email_conn.list()[1]:
folder.append(i)
return folder
def get_body(self, msg):
if msg.is_multipart():
return self.get_body(msg.get_payload(0))
else:
return msg.get_payload(None, decode=True)
def receiveremail(self):
email_conn = self.getemail()
email_conn.select(self.oldmaildir, readonly=False)
# email_data 为此文件夹下的所有邮件数据
status, email_data = email_conn.search(None, 'ALL')
newlist = email_data[0].split()
mail_count = len(newlist)
# print('{}个文件被找到!'.format(mail_count))
# 这样去定义 取多少条邮件
# key 为 num val 为{"sender":,"send_time": , "content": ,} 存邮件信息
mail_data_list = {}
for num in range(mail_count):
mail_data_list[num] = {}
if abs(num) > mail_count:
break
# 通过邮箱编号和选择获取什么数据
typ, data = email_conn.fetch(newlist[num], '(RFC822)')
# print(data)
msg_id = str(newlist[num])
# 用email库获取解析数据
msg = email.message_from_string(data[0][1].decode('utf-8'))
if (msg.is_multipart()):
parts = msg.get_payload()
for m in parts:
ctype = m.get_content_type()
# print(ctype)
if "multipart" in ctype:
body = str(self.get_body(m), encoding='ISO-8859-1')
# print(body)
subject = re.search(r"Subject.*", body).group()
mail_data_list[num]["subject"] = subject
sender = re.search(r"From.*", body).group()
# print(sender)
mail_data_list[num]["sender"] = sender
send_time = re.search(r"Date.*", body).group()
mail_data_list[num]["send_time"] = send_time
content = body
mail_data_list[num]["content"] = content
tran = GoogleTranslate()
encontent = tran.translate(content, en_to_zn=True)
mail_data_list[num]["encontent"] = encontent[0]
mail_data_list[num]["annex"] = "YES"
if "html" in ctype:
html = str(m.get_payload(decode=True).decode('utf-8'))
soup = BeautifulSoup(html, "lxml")
sender = soup.find_all("a")[0].get("href").split(":")[1]
# print(sender)
send_time = soup.find_all("b")[1].parent.get_text(strip=True)
send_time = re.search(r"(\d{4}-\d{1,2}-\d{1,2})", send_time).group()
# print(send_time)
# content = soup.find("div", attrs={"style": "BACKGROUND-COLOR: white"}).get_text(strip=True)
subject = soup.find_all("b")[-1].parent.get_text(strip=True).replace("Subject:", "")
# print(subject)
content = soup.find_all("b")[-1].parent.parent.parent.find_next_siblings("div")[0].text
mail_data_list[num]["subject"] = subject
mail_data_list[num]["sender"] = sender
mail_data_list[num]["send_time"] = send_time
mail_data_list[num]["content"] = content
tran = GoogleTranslate()
encontent = tran.translate(content, en_to_zn=True)
mail_data_list[num]["encontent"] = encontent[0]
mail_data_list[num]["annex"] = "NO"
# 参考邮件到文件夹
try:
msg_id = msg_id.lstrip("b").strip()
msg_id = int(msg_id.replace("'", ""))
# print(msg_id)
# print(type(msg_id))
# 拷贝到 另外一个文件夹
res = email_conn.copy(str(msg_id), self.maildir)
print('copy successful:')
except BaseException as e:
# 应该要写入日志中
print("拷贝邮件失败:", e)
self.removemail()
# print(mail_data_list)
return mail_data_list
def removemail(self):
email_conn = self.getemail()
email_conn.select(self.oldmaildir, readonly=False)
status, email_data = email_conn.search(None, 'ALL')
newlist = email_data[0].split()
mail_count = len(newlist)
# 不知道什么原因,一次删不完,做个while循环,直到删完。再退出
while mail_count !=0:
for num in range(mail_count):
typ, data = email_conn.fetch(newlist[num], '(RFC822)')
msg_id = str(newlist[num])
try:
msg_id = msg_id.lstrip("b").strip()
msg_id = int(msg_id.replace("'", ""))
# 删除邮件
email_conn.store(str(msg_id), '+FLAGS', '(\\Deleted)')
email_conn.expunge()
# print('deleted successful:')
except BaseException as e:
# 应该要写入日志中
print("拷贝邮件失败:", e)
status, email_data = email_conn.search(None, 'ALL')
newlist = email_data[0].split()
mail_count = len(newlist)
# print(mail_count)
class HandleExcel(object):
def __init__(self):
pass
def getexcel(self):
# 生成excel 句柄
wb = load_workbook("email.xlsx")
# sheet 句柄
sh = wb["emailcontent"]
return wb, sh
def writedict(self, info_dict):
wb, sh = self.getexcel()
max_row = sh.max_row
count = sh.cell(row=max_row, column=1)
if count.value == "None":
count_num = max_row+1
else:
# print(count.value)
count_num = count.value + 1
col = 0
for index in info_dict:
sh.cell(row=max_row + 1, column=1, value=count_num)
# 邮箱地址 第六列
senderaddress = info_dict[index]["sender"]
sh.cell(row=max_row + 1, column=6, value=senderaddress)
annex = info_dict[index]["annex"]
sh.cell(row=max_row + 1, column=2, value=annex)
# 用户发送邮箱时间 第三列
send_time = info_dict[index]["send_time"]
sh.cell(row=max_row + 1, column=3, value=send_time)
# 用户发送内容 第12列
content = info_dict[index]["content"]
sh.cell(row=max_row + 1, column=13, value=content)
# 用户发送的内容 使用google翻译的结果 第13列
encontent = info_dict[index]["encontent"]
sh.cell(row=max_row + 1, column=14, value=encontent)
# 下一行输入
count_num = count_num + 1
max_row = max_row + 1
wb.save("email.xlsx") # 保存
class GoogleTranslate(object):
def __init__(self):
self.ctx = execjs.compile( # 下面是一段js代码,从网页中分析得到
""" function TL(a) { var k = ""; var b = 406644; var b1 = 3293161072; var jd = "."; var $b = "+-a^+6"; var Zb = "+-3^+b+-f"; for (var e = [], f = 0, g = 0; g < a.length; g++) { var m = a.charCodeAt(g); 128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023), e[f++] = m >> 18 | 240, e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224, e[f++] = m >> 6 & 63 | 128), e[f++] = m & 63 | 128) } a = b; for (f = 0; f < e.length; f++) a += e[f], a = RL(a, $b); a = RL(a, Zb); a ^= b1 || 0; 0 > a && (a = (a & 2147483647) + 2147483648); a %= 1E6; return a.toString() + jd + (a ^ b) }; function RL(a, b) { var t = "a"; var Yb = "+"; for (var c = 0; c < b.length - 2; c += 3) { var d = b.charAt(c + 2), d = d >= t ? d.charCodeAt(0) - 87 : Number(d), d = b.charAt(c + 1) == Yb ? a >>> d: a << d; a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d } return a } """)
def getTk(self, text): # 计算谷歌的算法值
return self.ctx.call("TL", text)
def translate(self, content, en_to_zn=True):
"""
:param content: 翻译内容
:param en_to_zn: 是否由英文翻译成中文,默认为True
:return:
"""
if len(content) > 4891:
print("翻译的长度超过限制!!!")
return
tk = self.getTk(content)
param = {'tk': tk, 'q': content}
# url_zh_to_en = 'https://translate.google.cn/translate_a/single?client=t&sl=zh-CN&tl=en&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&otf=1&ssel=6&tsel=3&kc=1'
url_en_to_zh = "https://translate.google.cn/translate_a/single?client=t&sl=en &tl=zh-CN&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss &dt=t&ie=UTF-8&oe=UTF-8&clearbtn=1&otf=1&pc=1&srcrom=0&ssel=0&tsel=0&kc=2"
# 返回的结果为Json,解析为一个嵌套列表
result = requests.get(url_en_to_zh, params=param)
results = result.json()[0]
data = []
for test in results:
if test[0]:
data.append(test[0])
return '\n'.join(data), data
account = "xxx@xxx.com"
host = "imap.exmail.qq.com"
password = "xxxxx"
# 指定搜索的邮件文件夹
maildir = "&UXZO1mWHTvZZOQ-/oldfs"
# 处理完的邮件移动到哪个文件夹&UXZO1mWHTvZZOQ-/fs
oldmaildir = "&UXZO1mWHTvZZOQ-/fs"
autoemail = AutoEmail(account, host, password, maildir, oldmaildir)
info_dict = autoemail.receiveremail()
# print(info_dict)
excel = HandleExcel()
excel.writedict(info_dict)