from io import open
import os
def file_name(file_dir):
"""
获取某文件夹下,特定扩展名的文件名,
返回特定扩展名文件列表
"""
L=[]
for root, dirs, files in os.walk(file_dir):
for file in files:
if os.path.splitext(file)[1] == '.txt': #os.path.splitext()函数将路径拆分为文件名+扩展名
L.append(file)
return L
filedir = 'E:/pdfTotxt/txt/'
L = file_name(file_dir=filedir)
index=0
htmlFile= 'E:/pdfTotxt/txt/txt.html'
for i in range(len(L)):
if index<500 :
fineName=L[i]
txtFile=filedir+'/'+L[i]
f = open(txtFile, 'r', encoding='UTF-8')
content = f.read()
print(content)
index=index+1
with open(htmlFile, mode='a',encoding='UTF-8') as filename:
filename.write('<li>')
filename.write('<a target=_blank href=http://xxx/jimage/pdf/%E9%A1%B5%E9%9D%A2_'+fineName[3:6]+'.jpg >'+L[i]+'</a></br>')
filename.write('<pre>')
filename.write(content) # 换行
filename.write('</pre>')
filename.write('</li>')
filename.write('\n') # 换行