数据存储 csv

#

# 保存csv格式的数据
import
csv csvFile = open('test.csv','w+',newline='') #文本方式可读写 try: writer = csv.writer(csvFile) writer.writerow(('num','num+2','num*2')) for i in range(10): writer.writerow((i,i+2,i*2)) finally: csvFile.close()

#

# mysql python操作
import pymysql  #导包
conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', password=None, db='mysql',chaset='utf8') #创建连接
cur = conn.cursor() #创建游标
cur.execute("USE scraping") #执行 使用库
cur.execute("SELECT * FROM pages WHERE id=1") #执行语句
print(cur.fetchone())  #获取单条数据
cur.close()  #游标关闭
conn.close() #连接关闭

#

str = bytes(value=b'', encoding=None)  #指定编码
from urllib.request import urlopen
from io import StringIO  #字符串的缓存
import csv

data = urlopen("http://pythonscraping.com/files/MontyPythonAlbums.csv").read().decode('ascii', 'ignore')
dataFile = StringIO(data)
csvReader = csv.reader(dataFile)

for row in csvReader:
    print("The album \""+row[0]+"\" was released in "+str(row[1]))
#pdfminer3k
from pdfminer.pdfinterp import PDFResourceManager, process_pdf
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from io import StringIO
from io import open
from urllib.request import urlopen

def readPDF(pdfFile):
    rsrcmgr = PDFResourceManager()
    retstr = StringIO()
    laparams = LAParams()
    device = TextConverter(rsrcmgr, retstr, laparams=laparams)

    process_pdf(rsrcmgr, device, pdfFile)
    device.close()

    content = retstr.getvalue()
    retstr.close()
    return content

pdfFile = urlopen("http://pythonscraping.com/pages/warandpeace/chapter1.pdf")
outputString = readPDF(pdfFile)
print(outputString)
pdfFile.close()

 

from zipfile import ZipFile  #docx
from urllib.request import urlopen
from io import BytesIO
from bs4 import BeautifulSoup

wordFile = urlopen("http://pythonscraping.com/pages/AWordDocument.docx").read()
wordFile = BytesIO(wordFile)
document = ZipFile(wordFile)
xml_content = document.read('word/document.xml')

wordObj = BeautifulSoup(xml_content.decode('utf-8'), "lxml-xml")
textStrings = wordObj.findAll("w:t")
for textElem in textStrings:
    closeTag = ""
    try:
        style = textElem.parent.previousSibling.find("w:pStyle")
        if style is not None and style["w:val"] == "Title":
            print("<h1>")
            closeTag = "</h1>"
    except AttributeError: #不打印标签
        pass 
    print(textElem.text)
    print(closeTag)

 

posted @ 2019-07-09 22:21  追风zz  阅读(369)  评论(0编辑  收藏  举报