爬22域名成交

import requests
import re
import pandas as pd

host = r'https://am.22.cn'
url = 'https://am.22.cn/wsp/History/Detail/'

mainpatt = re.compile(r'tbody[\s\S]+?</tbody>')
rowpatt = re.compile(r'<tr>[\S\s]+?</tr>')
cellpatt = re.compile(r'<td[\s\S]+?>([\s\S]+?)</td>')
domainurlpatt = re.compile(r'href="([\s\S]+?)"')
domainpatt = re.compile(r'_blank">([\s\S]+?)</a>')
titlepatt = re.compile(r'<title>([\s\S]+?)</title>')

def getinner(url):
    bb = requests.get(url).content.decode('utf8')
    c = titlepatt.findall(bb)[0]
    return c

def getsingle(url):
    aa = requests.get(url=url).content.decode('utf8')
    main = mainpatt.findall(aa)[0]
    rows = rowpatt.findall(main)
    aa = []
    for each in rows:
        cells = cellpatt.findall(each)
        domainstr = cells[1]
        price = cells[3].strip().replace('<td>', '').replace('¥', '')
        status = cells[4].strip().replace('</font>', '')
        domainurl = domainurlpatt.findall(domainstr)[0].strip()
        domain = domainpatt.findall(domainstr)[0].strip()
        if domain.find("打包") >=0:
            tmpurl = host + domainurl
            domain = getinner(tmpurl)
        d = {
            'domain': domain, 'price': price, 'status': status
        }
        aa.append(d)
    return aa

if __name__ == '__main__':
    r = []
    for i in range(1000, 2232):
        turl = r'https://am.22.cn/wsp/History/Detail/' + str(i)
        a = getsingle(turl)
        r = r + a
        print(i, 'done')
    df = pd.DataFrame(r)
    df.to_excel('1.xlsx', index=False)

  

posted @ 2026-05-21 22:55  CrossPython  阅读(3)  评论(0)    收藏  举报