python实现网站登录会话脚本

# -*- coding: utf-8 -*-
import os
import requests
from bs4 import BeautifulSoup

def login(username, password):
    """
    Args:
        username: 用户名
        password: 密码
        
    Returns:
        session: 登录后的会话对象
    """
    # 创建会话对象
    session = requests.Session()
    
    # 设置请求头
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
        'Connection': 'keep-alive',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'same-origin',
        'Upgrade-Insecure-Requests': '1',
        'Host': 'www.example.com'
    }
    
    # 第一步:获取登录页面,提取execution参数和其他必要参数
    login_url = "https://www.example.com/cas/login"
    try:
        response = session.get(login_url, headers=headers)
        response.raise_for_status()
    except requests.RequestException as e:
        print(f"获取登录页面失败: {e}")
        return None
    
    # 解析HTML获取必要的参数
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # 获取其他可能的隐藏字段
    hidden_fields = {}
    for input_tag in soup.find_all('input', type='hidden'):
        name = input_tag.get('name')
        value = input_tag.get('value', '')
        if name:
            hidden_fields[name] = value
    
    # 第二步:提交登录表单
    login_data = {
        'username': username,
        'password': password
    }
    
    # 添加所有隐藏字段
    login_data.update(hidden_fields)
    
    # 更新请求头用于POST请求
    post_headers = headers.copy()
    post_headers.update({
        'Content-Type': 'application/x-www-form-urlencoded',
        'Origin': 'https://www.example.com',
        'Referer': 'https://www.example.com/cas/login',
        'X-Requested-With': 'XMLHttpRequest'
    })

    print(f"正在尝试登录,用户名: {username}")
    
    try:
        # 允许重定向,以便获取登录后的cookie
        response = session.post(login_url, data=login_data, headers=post_headers, allow_redirects=True)
        response.raise_for_status()
    except requests.RequestException as e:
        print(f"登录请求失败: {e}")
        return None
    
    # 检查登录是否成功
    if response.history and response.history[0].status_code == 302:
        # 检查是否有TGC cookie,这是登录成功的标志
        if 'TGC' in session.cookies:
            print("登录成功!")
            return session
        else:
            print("登录失败: 未获取到TGC cookie")
            return None
    else:
        print("登录失败: 未发生重定向或重定向状态码不正确")
        return None

# 使用示例
if __name__ == "__main__":
    # 从系统环境变量获取用户名和密码
    USERNAME = os.environ.get('USERNAME')
    PASSWORD = os.environ.get('PASSWORD')
    
    session = login(USERNAME, PASSWORD)
    
    if session:
        # 在这里可以使用session进行后续的请求
        print("成功获取会话对象,可以用于后续请求")
        # 例如:访问登录后的页面
        try:
            response = session.get("https://www.example.com/#/landingpage")
            print(f"访问登录后页面状态码: {response.status_code}")
        except Exception as e:
            print(f"访问登录后页面失败: {e}")
    else:
        print("登录失败")

posted @ 2025-09-22 14:06  wanghongwei-dev  阅读(13)  评论(0)    收藏  举报