基于python的邮件地址提取小程序

import sys

import os

import re



def analysis_file(path):

print("analysis file: %s." % path)

fi
= open(path, "r")

try:

all_text
= fi.read()

finally:

fi.close()



#print("content:")

#print(all_text)



mails
= set()

re_mail
= re.compile(r"([a-zA-Z-]+(?:\.[\w-]+)*@[\w-]+(?:\.[a-zA-Z-]+)+)")



ms
= re_mail.findall(all_text)

for m in ms:

#print(m)

mails.add(m)



print("results: %d" % len(mails))



if len(mails) > 0:

fo
= open(path + ".mail.txt", "wt")

for mail in mails:

fo.write(mail)

fo.write(
",")

fo.close()



def analysis_dir(path):

files
= os.listdir(path)

for file in files:

if (not os.path.isfile(file)) or file.endswith(".mail.txt"):

continue

analysis_file(path
+ "\\" + file)



def main():

print("analysis is working... ...")

print("current direcotry: %s." % os.getcwd())

if len(sys.argv) < 2:

print("set the directory to serach")

return

path
= sys.argv[1]

is_file
= os.path.isfile(path)

if is_file:

print("searching file: %s." % path)

analysis_file(path)

else:

if not os.path.exists(path):

print("there isn't exist direcoty: %s" % path)

return

print("searching alll files in directory: %s." % path)

analysis_dir(path)



if __name__ == '__main__':

main()
这是我在实际过程中常碰到的问题,大量的邮件地址分散在若干文件中,可以处理单个文件或目录,提取所有的邮件地址,并拼接成邮件地址列表,可以直接由于批量邮件的发送。
posted @ 2011-03-03 09:40  王忠惠  阅读(974)  评论(0)    收藏  举报