csdn复制脚本
import re
obj1=re.compile(r'<div.*?>(?P<dic_content>.*?)/div>')
obj=re.compile(r'>(?P<content>.*?)<')
for line in open('aa.txt'):
line=line.strip()
if line:
for it in obj1.finditer(line):
line_contents=[]
dic_content=it.group('dic_content')
for it_nei in obj.finditer(dic_content):
line_content=it_nei.group('content')
if line_content:
line_contents.append(line_content)
content=''.join(line_contents).strip()
if content:
print content