Python爬取final fantasy XIVWiki 中的工匠配方数据

一、选题背景

平时自己使用,提前准备需要的道具

二、设计方案

1,工匠配方爬虫

2,主题式网络爬虫爬取的内容分析生成

三、主题页面结构分析特征

1.数据来源:

http://ff14.huijiwiki.com/wiki/%E5%88%BB%E6%9C%A8%E5%8C%A0/%E9%85%8D%E6%96%B9/

 

 2.页面解析

 

 四、程序主体

from bs4 import BeautifulSoup
import urllib.request
import xlwt
from xlwt import *

file = Workbook(encoding='utf-8')
key = ['物品名称', '材料', '数量']

fnt = Font()
fnt.name = u'微软雅黑'
style = XFStyle()
style.font = fnt
style.alignment.horz = xlwt.Alignment.HORZ_CENTER
style.alignment.vert = xlwt.Alignment.VERT_CENTER

lvRange = []
for x in range(14):
    x0 = x * 5 + 1
    x1 = x0 + 4
    lvRange.append(str(x0) + '-' + str(x1))

print(lvRange)

def downlaoding(url, fn, lv):

    table = file.add_sheet(fn + lv)
    html_doc = urllib.request.urlopen(url).read().decode('utf-8')
    soup = BeautifulSoup(html_doc, 'html.parser')

    # 物品数量
    item_name = []

    # 物品材料
    item_material = []
    item_num = []
    item_material_count = []

    # 获取数据
    for q in soup.find_all('tr', class_='filter-div--item'):
        for u in q.find_all('div', class_='item-name rarity-common'):
            u0 = u.find('a').string
            item_name.append(u0)
        for u in q.find_all('div', class_='item-name rarity-uncommon'):
            u0 = u.find('a').string
            item_name.append(u0)

        # 材料
        count01 = 0
        for k in q.find_all('td', class_='table--dark-m'):
            for h in k.find_all('span', class_='item-name'):
                h0 = h.find('a').string
                item_material.append(h0)
                count01 += 1

            for w in k.find_all('span', class_='item-number'):
                w0 = w.text
                item_num.append(w0)

            item_material_count.append(count01)

    # 写入数据
    # 列名
    num = 0
    for c in range(len(key)):
        table.write(0, num, key[c], style)
        num += 1

    # 写入物品名
    col = 0
    row = 1
    n = 0

    for x in range(len(item_name)):
        row0 = row
        row += item_material_count[x]
        table.write_merge(row0, row-1, col, col, item_name[x], style)

    # 写入物品材料名称和数量
    col = 1
    row = 1
    for x in range(len(item_material)):
        table.write(row, col, item_material[x], style)
        table.write(row, col + 1, item_num[x], style)
        row += 1

    table.col(0).width = 5000
    table.col(1).width = 5000

    # 保存sheet
    file.save('配方统计.xlsx')


for x in range(len(lvRange)):
    downlaoding("http://ff14.huijiwiki.com/wiki/%E5%88%BB%E6%9C%A8%E5%8C%A0/%E9%85%8D%E6%96%B9/"+str(lvRange[x]), '刻木', lvRange[x])
    print("http://ff14.huijiwiki.com/wiki/刻木匠/配方/" + (lvRange[x]))
    downlaoding("http://ff14.huijiwiki.com/wiki/%E9%94%BB%E9%93%81%E5%8C%A0/%E9%85%8D%E6%96%B9/"+str(lvRange[x]), '锻铁', lvRange[x])
    print("http://ff14.huijiwiki.com/wiki/锻铁匠/配方/" + (lvRange[x]))
    downlaoding("http://ff14.huijiwiki.com/wiki/%E9%93%B8%E7%94%B2%E5%8C%A0/%E9%85%8D%E6%96%B9/"+str(lvRange[x]), '铸甲', lvRange[x])
    print("http://ff14.huijiwiki.com/wiki/铸甲匠/配方/" + (lvRange[x]))
    downlaoding("http://ff14.huijiwiki.com/wiki/%E9%9B%95%E9%87%91%E5%8C%A0/%E9%85%8D%E6%96%B9/"+str(lvRange[x]), '雕金', lvRange[x])
    print("http://ff14.huijiwiki.com/wiki/雕金匠/配方/" + (lvRange[x]))
    downlaoding("http://ff14.huijiwiki.com/wiki/%E5%88%B6%E9%9D%A9%E5%8C%A0/%E9%85%8D%E6%96%B9/"+str(lvRange[x]), '制革', lvRange[x])
    print("http://ff14.huijiwiki.com/wiki/制革匠/配方/" + (lvRange[x]))
    downlaoding("http://ff14.huijiwiki.com/wiki/%E8%A3%81%E8%A1%A3%E5%8C%A0/%E9%85%8D%E6%96%B9/"+str(lvRange[x]), '裁衣', lvRange[x])
    print("http://ff14.huijiwiki.com/wiki/裁衣匠/配方/" + (lvRange[x]))
    downlaoding("http://ff14.huijiwiki.com/wiki/%E7%82%BC%E9%87%91%E6%9C%AF%E5%A3%AB/%E9%85%8D%E6%96%B9/"+str(lvRange[x]), '炼金', lvRange[x])
    print("http://ff14.huijiwiki.com/wiki/炼金术士/配方/" + (lvRange[x]))
    downlaoding("http://ff14.huijiwiki.com/wiki/%E7%83%B9%E8%B0%83%E5%B8%88/%E9%85%8D%E6%96%B9/"+str(lvRange[x]), '烹调', lvRange[x])
    print("http://ff14.huijiwiki.com/wiki/烹调师/配方/" + (lvRange[x]))

五、生成效果

 

posted @ 2021-06-26 20:11  ageis  阅读(154)  评论(0编辑  收藏  举报