药->excel

1无日志

# @author: zhc
# @Time: 2023/5/18
# @FileName: demo
import re
import pandas as pd
import requests
session = requests.Session()
session.trust_env = False

#  获取第一页名字写入excel中  略
def infosaa(pages):
    Ls = []
    for page in range(1, int(pages) + 1):
        print(f"第{page}页")
        cookies = {
            'cf_clearance': 'z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250',
            '_gcl_au': '1.1.154779136.1682037508',
            'hubspotutk': '15b5c265b1847afab42a7def948ef734',
            '_gid': 'GA1.2.695314151.1684390995',
            '_clck': 'qmfr9z|2|fbp|0|1233',
            'ln_or': 'eyIyNDI4NDg0IjoiZCJ9',
            'cf_chl_2': '1ea719f0d331036',
            'cf_chl_rc_i': '1',
            '_omx_drug_bank_session': 'kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D',
            '_gat': '1',
            '_ga': 'GA1.1.1772772602.1682037506',
            '_ga_DDLJ7EEV9M': 'GS1.1.1684397556.12.0.1684397556.0.0.0',
            '_clsk': '15jlqzx|1684397557641|1|1|z.clarity.ms/collect',
            '__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11',
            '__hssrc': '1',
            '__hssc': '49600953.1.1684397557784',
        }

        headers = {
            'authority': 'go.drugbank.com',
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
            'cache-control': 'no-cache',
            # 'cookie': 'cf_clearance=z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250; _gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; cf_chl_2=1ea719f0d331036; cf_chl_rc_i=1; _omx_drug_bank_session=kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D; _gat=1; _ga=GA1.1.1772772602.1682037506; _ga_DDLJ7EEV9M=GS1.1.1684397556.12.0.1684397556.0.0.0; _clsk=15jlqzx|1684397557641|1|1|z.clarity.ms/collect; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11; __hssrc=1; __hssc=49600953.1.1684397557784',
            'pragma': 'no-cache',
            'referer': 'https://go.drugbank.com/unearth/q?query=*&button=&searcher=bio_entities',
            'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-fetch-dest': 'document',
            'sec-fetch-mode': 'navigate',
            'sec-fetch-site': 'same-origin',
            'sec-fetch-user': '?1',
            'upgrade-insecure-requests': '1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42',
        }

        parms = {
            'button': '',
            'page': str(page),
            'query': '*',
            'searcher': 'bio_entities'
        }

        response = session.get(
            'https://go.drugbank.com/unearth/q?',
            cookies=cookies,
            headers=headers,
            params=parms
        )
        infos = response.text
        # print(infos)
        try:
            nameLs = re.findall('class="hit-link"><a href=".*?">(.*?)</a>', infos)
            if not nameLs:
                print("最大页数了",page)
                break
            for name in nameLs:
                dic = {
                    "名字": name
                }
                print(dic)
                Ls.append(dic)


        except Exception:
            print("最大限度")
            break
        finally:
            pf = pd.DataFrame(Ls)  # 转列表为DataFrame
            path = pd.ExcelWriter('aa.xlsx')  # 设置保存路径
            pf.to_excel(path, encoding='utf-8', index=False)  # 转化为Excel
            path.save()  # 保存



def infos1(pages):
    for page in range(1, int(pages) + 1):
    # for page in range(243, int(pages) + 1):
        print(f"第{page}页")
        cookies = {
            'cf_clearance': 'z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250',
            '_gcl_au': '1.1.154779136.1682037508',
            'hubspotutk': '15b5c265b1847afab42a7def948ef734',
            '_gid': 'GA1.2.695314151.1684390995',
            '_clck': 'qmfr9z|2|fbp|0|1233',
            'ln_or': 'eyIyNDI4NDg0IjoiZCJ9',
            'cf_chl_2': '1ea719f0d331036',
            'cf_chl_rc_i': '1',
            '_omx_drug_bank_session': 'kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D',
            '_gat': '1',
            '_ga': 'GA1.1.1772772602.1682037506',
            '_ga_DDLJ7EEV9M': 'GS1.1.1684397556.12.0.1684397556.0.0.0',
            '_clsk': '15jlqzx|1684397557641|1|1|z.clarity.ms/collect',
            '__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11',
            '__hssrc': '1',
            '__hssc': '49600953.1.1684397557784',
        }

        headers = {
            'authority': 'go.drugbank.com',
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
            'cache-control': 'no-cache',
            # 'cookie': 'cf_clearance=z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250; _gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; cf_chl_2=1ea719f0d331036; cf_chl_rc_i=1; _omx_drug_bank_session=kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D; _gat=1; _ga=GA1.1.1772772602.1682037506; _ga_DDLJ7EEV9M=GS1.1.1684397556.12.0.1684397556.0.0.0; _clsk=15jlqzx|1684397557641|1|1|z.clarity.ms/collect; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11; __hssrc=1; __hssc=49600953.1.1684397557784',
            'pragma': 'no-cache',
            'referer': 'https://go.drugbank.com/unearth/q?query=*&button=&searcher=bio_entities',
            'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-fetch-dest': 'document',
            'sec-fetch-mode': 'navigate',
            'sec-fetch-site': 'same-origin',
            'sec-fetch-user': '?1',
            'upgrade-insecure-requests': '1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42',
        }

        parms = {
            'button': '',
            'page': str(page),
            'query': '*',
            'searcher': 'bio_entities'
        }

        response = session.get(
            'https://go.drugbank.com/unearth/q?',
            cookies=cookies,
            headers=headers,
            params=parms
        )
        infos = response.text
        # print(infos)
        try:
            urlLs = re.findall('class="hit-link"><a href="/bio_entities/(.*?)">(.*?)</a>', infos)
            if not urlLs:
                print("最大页数了",page)
                break
                # exit()
            for url1, url1_name in urlLs:
                # dic = {
                #     "url1": url1
                # }
                # print(dic)
                # Ls.append(dic)
                yield url1, url1_name


        except Exception:
            print("最大限度")
            break

def infos2(pages):
    # BE0000001 re = uniprot/P45059">
    for url_num, url_name in infos1(pages):
        # url_num = 'BE0000001'

        cookies = {
            '_gcl_au': '1.1.154779136.1682037508',
            'hubspotutk': '15b5c265b1847afab42a7def948ef734',
            '_gid': 'GA1.2.695314151.1684390995',
            '_clck': 'qmfr9z|2|fbp|0|1233',
            'ln_or': 'eyIyNDI4NDg0IjoiZCJ9',
            '__hssrc': '1',
            'cf_clearance': 'P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250',
            '_ga': 'GA1.1.1772772602.1682037506',
            '__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684397557784.1684400914210.12',
            '_omx_drug_bank_session': 'co5hwBY2ElsyR%2B9IpqGzP4A8QGVq%2BO3GeKR0U4zOn5RSgEklXYO2Osneon2e%2B0LzUJS7ZO6ts%2BTpcNj2c9z3Fi%2BldeNXWPYu0VheauFSoK7eLCwPzgdxP6YrpTVYgwi0aawcjgb00AbRgeiw78%2FfroSEmiQpiSWia%2BiQOOq6CGNnXw%2Fx1MqLf%2BzFxMrONecI6FPPYi8Be9rTgSx%2BNYuLZhE4HkAHRshHRyKGHqjOFkTKqmr4p83xoMxC8AYJ5e6M9utzp3OM8GV%2B5im%2FEfjSm3OaxkzXvLyep3QYVmixhYTy5DlzxnCoW0BezJbTlwjp3QPeNzwLk7oblnXRlg47CzlIhmb551RvSN1f6W10KYOduwbbD%2F5KXSazWIT5ekfSQY8%3D--ixR1euY%2F8niWD2GG--pdAc5Q10cDVjU8h3CSP33Q%3D%3D',
            '_ga_DDLJ7EEV9M': 'GS1.1.1684411279.14.1.1684412424.0.0.0',
        }

        headers = {
            'authority': 'go.drugbank.com',
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
            'cache-control': 'no-cache',
            # 'cookie': '_gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; __hssrc=1; cf_clearance=P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250; _ga=GA1.1.1772772602.1682037506; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684397557784.1684400914210.12; _omx_drug_bank_session=co5hwBY2ElsyR%2B9IpqGzP4A8QGVq%2BO3GeKR0U4zOn5RSgEklXYO2Osneon2e%2B0LzUJS7ZO6ts%2BTpcNj2c9z3Fi%2BldeNXWPYu0VheauFSoK7eLCwPzgdxP6YrpTVYgwi0aawcjgb00AbRgeiw78%2FfroSEmiQpiSWia%2BiQOOq6CGNnXw%2Fx1MqLf%2BzFxMrONecI6FPPYi8Be9rTgSx%2BNYuLZhE4HkAHRshHRyKGHqjOFkTKqmr4p83xoMxC8AYJ5e6M9utzp3OM8GV%2B5im%2FEfjSm3OaxkzXvLyep3QYVmixhYTy5DlzxnCoW0BezJbTlwjp3QPeNzwLk7oblnXRlg47CzlIhmb551RvSN1f6W10KYOduwbbD%2F5KXSazWIT5ekfSQY8%3D--ixR1euY%2F8niWD2GG--pdAc5Q10cDVjU8h3CSP33Q%3D%3D; _ga_DDLJ7EEV9M=GS1.1.1684411279.14.1.1684412424.0.0.0',
            'pragma': 'no-cache',
            'referer': 'https://go.drugbank.com/unearth/q?button=&page=1&query=%2A&searcher=bio_entities&__cf_chl_tk=OY4j_WDIGBrKuUCRiJCzMsHBDfKvpEbJksY5y4_sxO0-1684400898-0-gaNycGzNDdA',
            'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-fetch-dest': 'document',
            'sec-fetch-mode': 'navigate',
            'sec-fetch-site': 'same-origin',
            'sec-fetch-user': '?1',
            'upgrade-insecure-requests': '1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42',
        }

        response = session.get(f'https://go.drugbank.com/bio_entities/{url_num}', cookies=cookies, headers=headers)
        infos = response.text
        url_parms = re.findall(r'uniprot/(.*?)">', infos)
        for url_arm in url_parms:
            # print(url_arm)
            # yield url_arm
            yield url_num, url_arm, url_name

def infos3(pages):
    Ls = []
    for url_m, num, url_name in infos2(pages):
        # num = 'P45059'
        cookies = {
            '_gcl_au': '1.1.154779136.1682037508',
            'hubspotutk': '15b5c265b1847afab42a7def948ef734',
            '_gid': 'GA1.2.695314151.1684390995',
            '_clck': 'qmfr9z|2|fbp|0|1233',
            'ln_or': 'eyIyNDI4NDg0IjoiZCJ9',
            '__hssrc': '1',
            'cf_clearance': 'P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250',
            '__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684400914210.1684412518322.13',
            '_gat': '1',
            '_omx_drug_bank_session': 'Uq6izZN1HKl9qcTZGaXuYnOWtmEGQ276oYZznAVEqQkRMOC71A6R6VsEC4GzJZyw24Yr%2BWw8JBnw1yPLcOx0vuUZ%2Fwa1qOIXvZMlHr8%2Bg5o8dZ9U7jB0%2F6ZeSbBcFJfbDjnCPn0yyzFttMghTXxu0rZdeace5Bwkt5lRAaeAg4aDZRTiYpCZOEe29rGF95l38iU6rRrd85j7RFv%2FuV6ZMCP2ZP7DVTVcQLtIvU9iAItyl86nJiF4pYqaBNXyDffBHkVPQH5WD56EsbabWWPufe0oH4%2Fx7Ku4n%2Fy8pEWYITTaSvZuA8yW3R2UiKG9PgjpeNpEa6%2Bkgs46ewYq%2Fseaaye3R7bfpvvGb0Qu7XAFyHQmOvarbUJqHqaDwnOGfQ%3D%3D--LMKa9eGaiid6tqeE--tfFpItICck9LRYsmjYpR1Q%3D%3D',
            '_ga_DDLJ7EEV9M': 'GS1.1.1684411279.14.1.1684412956.0.0.0',
            '_ga': 'GA1.1.1772772602.1682037506',
            '_clsk': '13a49nl|1684412957117|3|1|z.clarity.ms/collect',
            '__hssc': '49600953.2.1684412518322',
        }

        headers = {
            'authority': 'go.drugbank.com',
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
            'cache-control': 'no-cache',
            # 'cookie': '_gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; __hssrc=1; cf_clearance=P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684400914210.1684412518322.13; _gat=1; _omx_drug_bank_session=Uq6izZN1HKl9qcTZGaXuYnOWtmEGQ276oYZznAVEqQkRMOC71A6R6VsEC4GzJZyw24Yr%2BWw8JBnw1yPLcOx0vuUZ%2Fwa1qOIXvZMlHr8%2Bg5o8dZ9U7jB0%2F6ZeSbBcFJfbDjnCPn0yyzFttMghTXxu0rZdeace5Bwkt5lRAaeAg4aDZRTiYpCZOEe29rGF95l38iU6rRrd85j7RFv%2FuV6ZMCP2ZP7DVTVcQLtIvU9iAItyl86nJiF4pYqaBNXyDffBHkVPQH5WD56EsbabWWPufe0oH4%2Fx7Ku4n%2Fy8pEWYITTaSvZuA8yW3R2UiKG9PgjpeNpEa6%2Bkgs46ewYq%2Fseaaye3R7bfpvvGb0Qu7XAFyHQmOvarbUJqHqaDwnOGfQ%3D%3D--LMKa9eGaiid6tqeE--tfFpItICck9LRYsmjYpR1Q%3D%3D; _ga_DDLJ7EEV9M=GS1.1.1684411279.14.1.1684412956.0.0.0; _ga=GA1.1.1772772602.1682037506; _clsk=13a49nl|1684412957117|3|1|z.clarity.ms/collect; __hssc=49600953.2.1684412518322',
            'pragma': 'no-cache',
            'referer': 'https://go.drugbank.com/unearth/q?button=&page=1&query=%2A&searcher=bio_entities&__cf_chl_tk=OY4j_WDIGBrKuUCRiJCzMsHBDfKvpEbJksY5y4_sxO0-1684400898-0-gaNycGzNDdA',
            'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-fetch-dest': 'document',
            'sec-fetch-mode': 'navigate',
            'sec-fetch-site': 'same-origin',
            'sec-fetch-user': '?1',
            'upgrade-insecure-requests': '1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42',
        }

        response = session.get(f'https://go.drugbank.com/polypeptides/{num}', cookies=cookies, headers=headers).text
        res_infosLs =  re.findall('Gene Name</dt><dd class="col-xl-10 col-md-9 col-sm-8">(.*?)</dd>', response)
        try:
            for resu in res_infosLs:
                print(f"最终结果:{url_name}----->{url_m}------>{num}----->{resu}")
                dic = {
                    "url_name": url_name,
                    "url_m": url_m,
                    "num": num,
                    "resu": resu
                }
                # print(dic)
                Ls.append(dic)
        except:
            ...
        finally:
            pf = pd.DataFrame(Ls)  # 转列表为DataFrame
            path = pd.ExcelWriter('result.xlsx')  # 设置保存路径
            pf.to_excel(path, encoding='utf-8', index=False)  # 转化为Excel
            path.save()  # 保存


if __name__ == '__main__':
    page = input("页数:")
    # infosaa(page)
    # infos1(page)  # 取id : 如'BE0000001'
    # infos2()  # 取参数:P45059
    infos3(page)

2日志版本

# -*- coding: utf-8 -*-
# @Author  : zhc
# @File    : juck_yao.pyo
import os.path
import re
import time

import pandas as pd
import requests
from loguru import logger
logger.add(sink='ces.log', encoding='utf-8', level='DEBUG')
session = requests.Session()
session.trust_env = False


#  获取第一页名字写入excel中  略
def infosaa(pages):
    Ls = []
    for page in range(1, int(pages) + 1):
        print(f"第{page}页")
        cookies = {
            'cf_clearance': 'z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250',
            '_gcl_au': '1.1.154779136.1682037508',
            'hubspotutk': '15b5c265b1847afab42a7def948ef734',
            '_gid': 'GA1.2.695314151.1684390995',
            '_clck': 'qmfr9z|2|fbp|0|1233',
            'ln_or': 'eyIyNDI4NDg0IjoiZCJ9',
            'cf_chl_2': '1ea719f0d331036',
            'cf_chl_rc_i': '1',
            '_omx_drug_bank_session': 'kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D',
            '_gat': '1',
            '_ga': 'GA1.1.1772772602.1682037506',
            '_ga_DDLJ7EEV9M': 'GS1.1.1684397556.12.0.1684397556.0.0.0',
            '_clsk': '15jlqzx|1684397557641|1|1|z.clarity.ms/collect',
            '__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11',
            '__hssrc': '1',
            '__hssc': '49600953.1.1684397557784',
        }

        headers = {
            'authority': 'go.drugbank.com',
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
            'cache-control': 'no-cache',
            # 'cookie': 'cf_clearance=z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250; _gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; cf_chl_2=1ea719f0d331036; cf_chl_rc_i=1; _omx_drug_bank_session=kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D; _gat=1; _ga=GA1.1.1772772602.1682037506; _ga_DDLJ7EEV9M=GS1.1.1684397556.12.0.1684397556.0.0.0; _clsk=15jlqzx|1684397557641|1|1|z.clarity.ms/collect; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11; __hssrc=1; __hssc=49600953.1.1684397557784',
            'pragma': 'no-cache',
            'referer': 'https://go.drugbank.com/unearth/q?query=*&button=&searcher=bio_entities',
            'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-fetch-dest': 'document',
            'sec-fetch-mode': 'navigate',
            'sec-fetch-site': 'same-origin',
            'sec-fetch-user': '?1',
            'upgrade-insecure-requests': '1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42',
        }

        parms = {
            'button': '',
            'page': str(page),
            'query': '*',
            'searcher': 'bio_entities'
        }

        response = session.get(
            'https://go.drugbank.com/unearth/q?',
            cookies=cookies,
            headers=headers,
            params=parms
        )
        infos = response.text
        # print(infos)
        try:
            nameLs = re.findall('class="hit-link"><a href=".*?">(.*?)</a>', infos)
            if not nameLs:
                print("最大页数了", page)
                break
            for name in nameLs:
                dic = {
                    "名字": name
                }
                print(dic)
                Ls.append(dic)


        except Exception:
            print("最大限度")
            break
        finally:
            pf = pd.DataFrame(Ls)  # 转列表为DataFrame
            path = pd.ExcelWriter('aa.xlsx')  # 设置保存路径
            pf.to_excel(path, encoding='utf-8', index=False)  # 转化为Excel
            path.save()  # 保存


def infos1(pages):
    for page in range(1, int(pages) + 1):
        # for page in range(243, int(pages) + 1):
        # print(f"第{page}页")
        logger.debug(f"第{page}页")
        cookies = {
            'cf_clearance': 'z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250',
            '_gcl_au': '1.1.154779136.1682037508',
            'hubspotutk': '15b5c265b1847afab42a7def948ef734',
            '_gid': 'GA1.2.695314151.1684390995',
            '_clck': 'qmfr9z|2|fbp|0|1233',
            'ln_or': 'eyIyNDI4NDg0IjoiZCJ9',
            'cf_chl_2': '1ea719f0d331036',
            'cf_chl_rc_i': '1',
            '_omx_drug_bank_session': 'kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D',
            '_gat': '1',
            '_ga': 'GA1.1.1772772602.1682037506',
            '_ga_DDLJ7EEV9M': 'GS1.1.1684397556.12.0.1684397556.0.0.0',
            '_clsk': '15jlqzx|1684397557641|1|1|z.clarity.ms/collect',
            '__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11',
            '__hssrc': '1',
            '__hssc': '49600953.1.1684397557784',
        }

        headers = {
            'authority': 'go.drugbank.com',
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
            'cache-control': 'no-cache',
            # 'cookie': 'cf_clearance=z8jXLY4NjL4.KVOUbgZNPWj6NPBlT_u.x4xmS19uZZE-1682037487-0-250; _gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; cf_chl_2=1ea719f0d331036; cf_chl_rc_i=1; _omx_drug_bank_session=kk16uujsTygoHwxfGEhrQEyjxT2CzYy2PCRfatX70%2BtKAQzuRCwO6fY%2FjHPVOsS2SLetNSvbEMvJD868pOaTuZ8EKhJqcNoxDZG68MLXcygqnx5g6cerWxPObUdqnBPQPWgcAJM7f%2FCOtvA%2BaHVLah3%2Fwcfl%2FfbkpfjV%2BsNNubQF1D9LB4e4xsUZpcntjSlLNAh6JykRisrdIlvIqN6%2B56vBklZtnRELonZY9yvyY%2B01bKCoflxsvNu8NS6ouAHWgBChb7%2BuoEBd4c6X4MttHMtlcKKhryxzE9mGZ6nfckvpGWqaGMptW7n2TsWUzFyBIaEwCgMTgHASS7W432%2FzrIgArwjxDV6hkQccMFE1EbT2%2BVMjfBz3NFHyrDkyQhZxLmnzE0jQzSpMa1lzob6Hw30cQKtnBBVbTbsbBNk6SX3fM4FMhHcaCvAZZE2mPQ%3D%3D--LWxvxvusC6OofJHn--tZPo%2BodDD6mfp%2FsRb%2BDc4w%3D%3D; _gat=1; _ga=GA1.1.1772772602.1682037506; _ga_DDLJ7EEV9M=GS1.1.1684397556.12.0.1684397556.0.0.0; _clsk=15jlqzx|1684397557641|1|1|z.clarity.ms/collect; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684390996375.1684397557784.11; __hssrc=1; __hssc=49600953.1.1684397557784',
            'pragma': 'no-cache',
            'referer': 'https://go.drugbank.com/unearth/q?query=*&button=&searcher=bio_entities',
            'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-fetch-dest': 'document',
            'sec-fetch-mode': 'navigate',
            'sec-fetch-site': 'same-origin',
            'sec-fetch-user': '?1',
            'upgrade-insecure-requests': '1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42',
        }

        parms = {
            'button': '',
            'page': str(page),
            'query': '*',
            'searcher': 'bio_entities'
        }

        response = session.get(
            'https://go.drugbank.com/unearth/q?',
            cookies=cookies,
            headers=headers,
            params=parms
        )
        infos = response.text
        # print(infos)
        try:
            urlLs = re.findall('class="hit-link"><a href="/bio_entities/(.*?)">(.*?)</a>', infos)
            if not urlLs:
                print("最大页数了", page)
                break
                # exit()
            for url1, url1_name in urlLs:
                # dic = {
                #     "url1": url1
                # }
                # print(dic)
                # Ls.append(dic)
                yield url1, url1_name


        except Exception:
            # print("最大限度")
            logger.error("最大限度")
            break


def infos2(pages):
    # BE0000001 re = uniprot/P45059">
    for url_num, url_name in infos1(pages):
        # url_num = 'BE0000001'

        cookies = {
            '_gcl_au': '1.1.154779136.1682037508',
            'hubspotutk': '15b5c265b1847afab42a7def948ef734',
            '_gid': 'GA1.2.695314151.1684390995',
            '_clck': 'qmfr9z|2|fbp|0|1233',
            'ln_or': 'eyIyNDI4NDg0IjoiZCJ9',
            '__hssrc': '1',
            'cf_clearance': 'P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250',
            '_ga': 'GA1.1.1772772602.1682037506',
            '__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684397557784.1684400914210.12',
            '_omx_drug_bank_session': 'co5hwBY2ElsyR%2B9IpqGzP4A8QGVq%2BO3GeKR0U4zOn5RSgEklXYO2Osneon2e%2B0LzUJS7ZO6ts%2BTpcNj2c9z3Fi%2BldeNXWPYu0VheauFSoK7eLCwPzgdxP6YrpTVYgwi0aawcjgb00AbRgeiw78%2FfroSEmiQpiSWia%2BiQOOq6CGNnXw%2Fx1MqLf%2BzFxMrONecI6FPPYi8Be9rTgSx%2BNYuLZhE4HkAHRshHRyKGHqjOFkTKqmr4p83xoMxC8AYJ5e6M9utzp3OM8GV%2B5im%2FEfjSm3OaxkzXvLyep3QYVmixhYTy5DlzxnCoW0BezJbTlwjp3QPeNzwLk7oblnXRlg47CzlIhmb551RvSN1f6W10KYOduwbbD%2F5KXSazWIT5ekfSQY8%3D--ixR1euY%2F8niWD2GG--pdAc5Q10cDVjU8h3CSP33Q%3D%3D',
            '_ga_DDLJ7EEV9M': 'GS1.1.1684411279.14.1.1684412424.0.0.0',
        }

        headers = {
            'authority': 'go.drugbank.com',
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
            'cache-control': 'no-cache',
            # 'cookie': '_gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; __hssrc=1; cf_clearance=P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250; _ga=GA1.1.1772772602.1682037506; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684397557784.1684400914210.12; _omx_drug_bank_session=co5hwBY2ElsyR%2B9IpqGzP4A8QGVq%2BO3GeKR0U4zOn5RSgEklXYO2Osneon2e%2B0LzUJS7ZO6ts%2BTpcNj2c9z3Fi%2BldeNXWPYu0VheauFSoK7eLCwPzgdxP6YrpTVYgwi0aawcjgb00AbRgeiw78%2FfroSEmiQpiSWia%2BiQOOq6CGNnXw%2Fx1MqLf%2BzFxMrONecI6FPPYi8Be9rTgSx%2BNYuLZhE4HkAHRshHRyKGHqjOFkTKqmr4p83xoMxC8AYJ5e6M9utzp3OM8GV%2B5im%2FEfjSm3OaxkzXvLyep3QYVmixhYTy5DlzxnCoW0BezJbTlwjp3QPeNzwLk7oblnXRlg47CzlIhmb551RvSN1f6W10KYOduwbbD%2F5KXSazWIT5ekfSQY8%3D--ixR1euY%2F8niWD2GG--pdAc5Q10cDVjU8h3CSP33Q%3D%3D; _ga_DDLJ7EEV9M=GS1.1.1684411279.14.1.1684412424.0.0.0',
            'pragma': 'no-cache',
            'referer': 'https://go.drugbank.com/unearth/q?button=&page=1&query=%2A&searcher=bio_entities&__cf_chl_tk=OY4j_WDIGBrKuUCRiJCzMsHBDfKvpEbJksY5y4_sxO0-1684400898-0-gaNycGzNDdA',
            'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-fetch-dest': 'document',
            'sec-fetch-mode': 'navigate',
            'sec-fetch-site': 'same-origin',
            'sec-fetch-user': '?1',
            'upgrade-insecure-requests': '1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42',
        }

        response = session.get(f'https://go.drugbank.com/bio_entities/{url_num}', cookies=cookies, headers=headers)
        infos = response.text
        url_parms = re.findall(r'uniprot/(.*?)">', infos)
        for url_arm in url_parms:
            # print(url_arm)
            # yield url_arm
            yield url_num, url_arm, url_name


def infos3(pages):
    Ls = []
    for url_m, num, url_name in infos2(pages):
        # num = 'P45059'
        cookies = {
            '_gcl_au': '1.1.154779136.1682037508',
            'hubspotutk': '15b5c265b1847afab42a7def948ef734',
            '_gid': 'GA1.2.695314151.1684390995',
            '_clck': 'qmfr9z|2|fbp|0|1233',
            'ln_or': 'eyIyNDI4NDg0IjoiZCJ9',
            '__hssrc': '1',
            'cf_clearance': 'P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250',
            '__hstc': '49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684400914210.1684412518322.13',
            '_gat': '1',
            '_omx_drug_bank_session': 'Uq6izZN1HKl9qcTZGaXuYnOWtmEGQ276oYZznAVEqQkRMOC71A6R6VsEC4GzJZyw24Yr%2BWw8JBnw1yPLcOx0vuUZ%2Fwa1qOIXvZMlHr8%2Bg5o8dZ9U7jB0%2F6ZeSbBcFJfbDjnCPn0yyzFttMghTXxu0rZdeace5Bwkt5lRAaeAg4aDZRTiYpCZOEe29rGF95l38iU6rRrd85j7RFv%2FuV6ZMCP2ZP7DVTVcQLtIvU9iAItyl86nJiF4pYqaBNXyDffBHkVPQH5WD56EsbabWWPufe0oH4%2Fx7Ku4n%2Fy8pEWYITTaSvZuA8yW3R2UiKG9PgjpeNpEa6%2Bkgs46ewYq%2Fseaaye3R7bfpvvGb0Qu7XAFyHQmOvarbUJqHqaDwnOGfQ%3D%3D--LMKa9eGaiid6tqeE--tfFpItICck9LRYsmjYpR1Q%3D%3D',
            '_ga_DDLJ7EEV9M': 'GS1.1.1684411279.14.1.1684412956.0.0.0',
            '_ga': 'GA1.1.1772772602.1682037506',
            '_clsk': '13a49nl|1684412957117|3|1|z.clarity.ms/collect',
            '__hssc': '49600953.2.1684412518322',
        }

        headers = {
            'authority': 'go.drugbank.com',
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
            'cache-control': 'no-cache',
            # 'cookie': '_gcl_au=1.1.154779136.1682037508; hubspotutk=15b5c265b1847afab42a7def948ef734; _gid=GA1.2.695314151.1684390995; _clck=qmfr9z|2|fbp|0|1233; ln_or=eyIyNDI4NDg0IjoiZCJ9; __hssrc=1; cf_clearance=P9Q0Ev.37r_S0U4yEDK1BRLlzotAwwmQcvWTwIecE2k-1684400898-0-250; __hstc=49600953.15b5c265b1847afab42a7def948ef734.1682037569169.1684400914210.1684412518322.13; _gat=1; _omx_drug_bank_session=Uq6izZN1HKl9qcTZGaXuYnOWtmEGQ276oYZznAVEqQkRMOC71A6R6VsEC4GzJZyw24Yr%2BWw8JBnw1yPLcOx0vuUZ%2Fwa1qOIXvZMlHr8%2Bg5o8dZ9U7jB0%2F6ZeSbBcFJfbDjnCPn0yyzFttMghTXxu0rZdeace5Bwkt5lRAaeAg4aDZRTiYpCZOEe29rGF95l38iU6rRrd85j7RFv%2FuV6ZMCP2ZP7DVTVcQLtIvU9iAItyl86nJiF4pYqaBNXyDffBHkVPQH5WD56EsbabWWPufe0oH4%2Fx7Ku4n%2Fy8pEWYITTaSvZuA8yW3R2UiKG9PgjpeNpEa6%2Bkgs46ewYq%2Fseaaye3R7bfpvvGb0Qu7XAFyHQmOvarbUJqHqaDwnOGfQ%3D%3D--LMKa9eGaiid6tqeE--tfFpItICck9LRYsmjYpR1Q%3D%3D; _ga_DDLJ7EEV9M=GS1.1.1684411279.14.1.1684412956.0.0.0; _ga=GA1.1.1772772602.1682037506; _clsk=13a49nl|1684412957117|3|1|z.clarity.ms/collect; __hssc=49600953.2.1684412518322',
            'pragma': 'no-cache',
            'referer': 'https://go.drugbank.com/unearth/q?button=&page=1&query=%2A&searcher=bio_entities&__cf_chl_tk=OY4j_WDIGBrKuUCRiJCzMsHBDfKvpEbJksY5y4_sxO0-1684400898-0-gaNycGzNDdA',
            'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-fetch-dest': 'document',
            'sec-fetch-mode': 'navigate',
            'sec-fetch-site': 'same-origin',
            'sec-fetch-user': '?1',
            'upgrade-insecure-requests': '1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42',
        }

        response = session.get(f'https://go.drugbank.com/polypeptides/{num}', cookies=cookies, headers=headers).text
        res_infosLs = re.findall('Gene Name</dt><dd class="col-xl-10 col-md-9 col-sm-8">(.*?)</dd>', response)
        try:
            for resu in res_infosLs:
                # print(f"最终结果:{url_name}----->{url_m}------>{num}----->{resu}")
                dic = {
                    "url_name": url_name,
                    "url_m": url_m,
                    "num": num,
                    "resu": resu
                }
                logger.info(dic)
                # print(dic)
                Ls.append(dic)
        except:
            ...
        finally:
            pf = pd.DataFrame(Ls)  # 转列表为DataFrame
            path = pd.ExcelWriter('result.xlsx')  # 设置保存路径
            pf.to_excel(path, encoding='utf-8', index=False)  # 转化为Excel
            path.save()  # 保存

def run():
    print("""
    _____ _  Author: 十架bgm         __
    _________   ___ ___    _____________________________________________
    \_   ___ \ /   |   \  /  _  \__    ___/  _____/\______   \__    ___/
    /    \  \//    ~    \/  /_\  \|    | /   \  ___ |     ___/ |    |
    \     \___\    Y    /    |    \    | \    \_\  \|    |     |    |
     \______  /\___|_  /\____|__  /____|  \______  /|____|     |____|
            \/       \/         \/               \/        version=1.1

    """)


if __name__ == '__main__':
    run()
    page = input("页数:")
    # infosaa(page)
    # infos1(page)  # 取id : 如'BE0000001'
    # infos2()  # 取参数:P45059
    logger.debug("采集开始行动,开始计时间")
    start_time = time.time()
    logger.debug(f'启动模块{os.path.basename(os.path.abspath(__file__))}')
    infos3(page)
    end_time = time.time()
    logger.warning(f"计时结束,运行{end_time - start_time}秒")
    logger.warning("采集结束,已保存")

部分结果

posted @ 2023-05-18 22:26  __username  阅读(116)  评论(0)    收藏  举报