获取XXXX个股前十大股东数据-01

# !/usr/bin/env python
# -*- coding:utf-8 -*-

"""
# File       : XXXX.py
# Time       :2023/8/18 11:38
# Author     :lrtao2010
# version    :python 3.10.1
# Description:获取XXXX个股前十大股东
"""

#导入模块
import requests #下载网页
import re
import time
import random

#更新时间
change_time = "2023/6/30"

#生成股票代码列表
my_code_list = []
with open("./shuju/XXXX.txt",encoding='utf-8')as f:
    for i in f.readlines():
        my_code_list.append(i.strip())
#print(my_code_list)

def spider(url):
    # 自定义请求头
    my_headers = {
        'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Host': 'XXXX.XXXX.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
    }

    resp = requests.get(url,headers=my_headers,timeout=(30,30))
    #print(resp.text)
    obj = re.compile(r'<td>十大流通股东</td>.*?<td>'
                     r'(?P<time>.*?)</td>'
                     #r'.*?href=.*?>(?P<name>.*?)</a>'
                     r'.*?class=.*?>(<a.*?>){0,1}(?P<name>.*?)(</a>){0,1}</td>' #(<a.*?>){0,1} 作为整体匹配0或1次
                     r'.*?class=.*?>(?P<change>.*?)</td>',re.S)  # re.S可以让re匹配到换行符
    result = obj.finditer(resp.text)
    item_len = 0
    for item in result:
        item_len += 1
        dic = item.groupdict()
        if dic["time"] == change_time:
            break
        #print("|".join(dic.values()))
        with open("./shuju/shareholder.txt", 'a+', encoding='utf-8')as s_f:
            s_f.write(f"{my_list}" + "|" + "|".join(dic.values()) + "\n")
        if item_len >= 10:
            break



#主程序,注意修改change_time 变量值

if __name__ == '__main__':
    for my_list in my_code_list:
        print(my_list)
        url = f"http://XXXX.XXXX.com//?XXXX?XXXX={my_list}"
        #url = f"http://XXXX.XXXX.com//?XXXX?XXXX=600936"
        spider(url)
        time.sleep(random.randint(1, 2))

 

posted @ 2024-02-28 18:27  蒙古草原狼  阅读(12)  评论(0编辑  收藏  举报