使用python脚本提取SVN近10年提交记录

1. 下载TSvnPwd.exe工具,访问地址:TortoiseSVN Password Decrypter

image

 

2. 双击运行TSvnPwd.exe,会自动出现svn提交的账密

image

 3. 使用Py脚本如下:

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
SVN提交记录提取工具(带认证支持)
功能:提取SVN近10年的提交记录并保存到CSV文件
支持用户名密码认证
"""

import os
import sys
import csv
import subprocess
import re
from datetime import datetime, timedelta
from collections import defaultdict
import argparse
import logging
from typing import List, Dict, Any, Optional
import xml.etree.ElementTree as ET
import getpass
import keyring
import configparser

# 配置日志
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('svn_log_extractor.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

class SVNAuthenticator:
    """SVN认证管理器"""
    
    @staticmethod
    def get_credentials_from_keyring(svn_url: str) -> Optional[tuple]:
        """从系统密钥环获取凭据"""
        try:
            import keyring
            username = keyring.get_password(f"svn_{svn_url}", "username")
            password = keyring.get_password(f"svn_{svn_url}", "password")
            if username and password:
                return username, password
        except ImportError:
            logger.warning("未安装keyring库,无法使用密钥环功能")
        except Exception as e:
            logger.warning(f"从密钥环获取凭据失败: {e}")
        return None
    
    @staticmethod
    def save_credentials_to_keyring(svn_url: str, username: str, password: str):
        """保存凭据到密钥环"""
        try:
            import keyring
            keyring.set_password(f"svn_{svn_url}", "username", username)
            keyring.set_password(f"svn_{svn_url}", "password", password)
            logger.info("凭据已保存到密钥环")
        except ImportError:
            logger.warning("未安装keyring库,无法保存到密钥环")
        except Exception as e:
            logger.warning(f"保存凭据到密钥环失败: {e}")
    
    @staticmethod
    def get_credentials_from_config() -> Optional[tuple]:
        """从配置文件获取凭据"""
        config_file = os.path.expanduser("~/.svn_extractor.conf")
        if os.path.exists(config_file):
            try:
                config = configparser.ConfigParser()
                config.read(config_file)
                if 'credentials' in config:
                    username = config['credentials'].get('username', '')
                    password = config['credentials'].get('password', '')
                    if username and password:
                        return username, password
            except Exception as e:
                logger.warning(f"读取配置文件失败: {e}")
        return None
    
    @staticmethod
    def save_credentials_to_config(username: str, password: str):
        """保存凭据到配置文件"""
        config_file = os.path.expanduser("~/.svn_extractor.conf")
        try:
            config = configparser.ConfigParser()
            config['credentials'] = {
                'username': username,
                'password': password
            }
            with open(config_file, 'w') as f:
                config.write(f)
            os.chmod(config_file, 0o600)  # 设置文件权限为只有用户可读写
            logger.info("凭据已保存到配置文件")
        except Exception as e:
            logger.warning(f"保存凭据到配置文件失败: {e}")
    
    @staticmethod
    def prompt_for_credentials() -> tuple:
        """交互式获取凭据"""
        print("\n=== SVN认证信息 ===")
        username = input("SVN用户名: ").strip()
        password = getpass.getpass("SVN密码: ").strip()
        return username, password

class SVNLogExtractor:
    def __init__(self, svn_url: str, output_file: str = 'svn_commits.csv',
                 username: str = None, password: str = None,
                 save_credentials: bool = False):
        """
        初始化SVN日志提取器
        
        Args:
            svn_url: SVN仓库URL或本地路径
            output_file: 输出文件路径
            username: SVN用户名
            password: SVN密码
            save_credentials: 是否保存凭据
        """
        self.svn_url = svn_url
        self.output_file = output_file
        self.username = username
        self.password = password
        self.save_credentials = save_credentials
        self.commits = []
        
        # 如果没有提供凭据,尝试从各种来源获取
        if not (self.username and self.password):
            self._load_credentials()
    
    def _load_credentials(self):
        """从各种来源加载凭据"""
        # 1. 从密钥环获取
        creds = SVNAuthenticator.get_credentials_from_keyring(self.svn_url)
        if creds:
            self.username, self.password = creds
            logger.info("从密钥环加载凭据")
            return
        
        # 2. 从配置文件获取
        creds = SVNAuthenticator.get_credentials_from_config()
        if creds:
            self.username, self.password = creds
            logger.info("从配置文件加载凭据")
            return
        
        # 3. 交互式获取
        if not (self.username and self.password):
            self.username, self.password = SVNAuthenticator.prompt_for_credentials()
            
            # 询问是否保存凭据
            if self.username and self.password and self.save_credentials:
                save = input("是否保存凭据供以后使用?(y/n): ").strip().lower()
                if save == 'y':
                    try:
                        save_method = input("保存到: 1)密钥环 2)配置文件 (输入1或2): ").strip()
                        if save_method == '1':
                            SVNAuthenticator.save_credentials_to_keyring(
                                self.svn_url, self.username, self.password
                            )
                        elif save_method == '2':
                            SVNAuthenticator.save_credentials_to_config(
                                self.username, self.password
                            )
                    except Exception as e:
                        logger.warning(f"保存凭据失败: {e}")
    
    def check_svn_installed(self) -> bool:
        """检查SVN是否安装"""
        try:
            subprocess.run(['svn', '--version'], 
                         stdout=subprocess.PIPE, 
                         stderr=subprocess.PIPE, 
                         check=True)
            return True
        except (subprocess.CalledProcessError, FileNotFoundError):
            return False
    
    def calculate_date_range(self, years: int = 10) -> tuple:
        """计算日期范围"""
        end_date = datetime.now()
        start_date = end_date - timedelta(days=years * 365)
        return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')
    
    def build_svn_command(self, subcommand: str, args: List[str] = None) -> List[str]:
        """构建SVN命令,包含认证信息"""
        cmd = ['svn', subcommand]
        
        # 添加认证信息
        if self.username:
            cmd.extend(['--username', self.username])
        if self.password:
            cmd.extend(['--password', self.password])
        if self.username or self.password:
            cmd.append('--no-auth-cache')  # 不缓存认证信息
        
        # 添加其他参数
        if args:
            cmd.extend(args)
        
        return cmd
    
    def get_svn_log_xml(self, start_date: str, end_date: str, limit: int = None) -> str:
        """获取SVN日志的XML格式"""
        cmd_args = [
            self.svn_url,
            '-r', f'{{{start_date}}}:{{{end_date}}}',
            '--verbose',
            '--xml'
        ]
        
        if limit:
            cmd_args.extend(['--limit', str(limit)])
        
        cmd = self.build_svn_command('log', cmd_args)
        
        # 屏蔽密码的日志输出
        safe_cmd = []
        for part in cmd:
            if part == '--password' and self.password:
                safe_cmd.append(part)
                safe_cmd.append('***')
            else:
                safe_cmd.append(part)
        
        logger.info(f"执行命令: {' '.join(safe_cmd)}")
        
        try:
            result = subprocess.run(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                encoding='utf-8',
                check=True
            )
            return result.stdout
        except subprocess.CalledProcessError as e:
            if 'Authentication failed' in e.stderr:
                logger.error("认证失败,请检查用户名和密码")
                # 清除缓存的凭据
                self.username = None
                self.password = None
            else:
                logger.error(f"SVN命令执行失败: {e.stderr[:500]}...")  # 只显示前500字符
            return None
    
    def test_connection(self) -> bool:
        """测试SVN连接"""
        logger.info("测试SVN连接...")
        cmd = self.build_svn_command('info', [self.svn_url])
        
        try:
            result = subprocess.run(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                encoding='utf-8',
                check=True,
                timeout=30  # 30秒超时
            )
            logger.info("SVN连接测试成功")
            return True
        except subprocess.TimeoutExpired:
            logger.error("连接超时")
            return False
        except subprocess.CalledProcessError as e:
            if 'Authentication failed' in e.stderr:
                logger.error("认证失败,请检查用户名和密码")
            elif 'Unable to connect' in e.stderr:
                logger.error("无法连接到SVN服务器")
            else:
                logger.error(f"连接测试失败: {e.stderr[:500]}...")
            return False
    
    def parse_svn_log_xml(self, xml_content: str) -> List[Dict[str, Any]]:
        """解析SVN日志XML"""
        commits = []
        
        if not xml_content:
            return commits
        
        try:
            root = ET.fromstring(xml_content)
            
            total_entries = len(root.findall('logentry'))
            logger.info(f"开始解析 {total_entries} 条提交记录")
            
            for idx, logentry in enumerate(root.findall('logentry'), 1):
                commit = {}
                
                # 基础信息
                commit['revision'] = logentry.get('revision')
                
                # 作者
                author_elem = logentry.find('author')
                commit['author'] = author_elem.text if author_elem is not None else ''
                
                # 日期
                date_elem = logentry.find('date')
                commit['date'] = date_elem.text if date_elem is not None else ''
                
                # 消息
                msg_elem = logentry.find('msg')
                commit['message'] = msg_elem.text if msg_elem is not None else ''
                
                # 文件变更
                paths = []
                paths_elem = logentry.find('paths')
                if paths_elem is not None:
                    for path_elem in paths_elem.findall('path'):
                        action = path_elem.get('action', '')
                        kind = path_elem.get('kind', '')
                        path_text = path_elem.text or ''
                        paths.append(f"{action}:{kind}:{path_text}")
                
                commit['changed_paths'] = ' | '.join(paths)
                
                # 统计信息
                commit['message_length'] = len(commit['message'])
                commit['changed_files_count'] = len(paths)
                
                # 提取分支信息(如果有)
                branch_match = re.search(r'/(branches|tags)/([^/]+)', commit['changed_paths'])
                if branch_match:
                    commit['branch_type'] = branch_match.group(1)
                    commit['branch_name'] = branch_match.group(2)
                else:
                    commit['branch_type'] = 'trunk'
                    commit['branch_name'] = 'trunk'
                
                commits.append(commit)
                
                # 显示进度
                if idx % 100 == 0 or idx == total_entries:
                    logger.info(f"解析进度: {idx}/{total_entries} ({idx/total_entries*100:.1f}%)")
                    
        except ET.ParseError as e:
            logger.error(f"XML解析失败: {e}")
        except Exception as e:
            logger.error(f"解析过程中发生错误: {e}")
        
        return commits
    
    def save_to_csv(self, commits: List[Dict[str, Any]]):
        """保存到CSV文件"""
        if not commits:
            logger.warning("没有提交记录可保存")
            return
        
        fieldnames = [
            'revision', 'author', 'date', 'message', 
            'changed_paths', 'message_length', 'changed_files_count',
            'branch_type', 'branch_name'
        ]
        
        try:
            # 确保输出目录存在
            output_dir = os.path.dirname(os.path.abspath(self.output_file))
            if output_dir and not os.path.exists(output_dir):
                os.makedirs(output_dir)
            
            with open(self.output_file, 'w', newline='', encoding='utf-8-sig') as f:
                writer = csv.DictWriter(f, fieldnames=fieldnames)
                writer.writeheader()
                writer.writerows(commits)
            
            logger.info(f"已保存 {len(commits)} 条提交记录到 {self.output_file}")
            
            # 生成统计信息
            self.generate_statistics(commits)
            
            # 生成摘要报告
            self.generate_summary(commits)
            
        except Exception as e:
            logger.error(f"保存CSV文件失败: {e}")
            import traceback
            traceback.print_exc()
    
    def generate_statistics(self, commits: List[Dict[str, Any]]):
        """生成统计信息"""
        if not commits:
            return
        
        stats = {
            'total_commits': len(commits),
            'unique_authors': set(),
            'earliest_date': None,
            'latest_date': None,
            'commits_by_year': defaultdict(int),
            'commits_by_month': defaultdict(int),
            'commits_by_author': defaultdict(int),
            'commits_by_branch': defaultdict(int),
            'files_changed_by_commit': [],
            'message_lengths': []
        }
        
        for commit in commits:
            # 作者统计
            stats['unique_authors'].add(commit['author'])
            stats['commits_by_author'][commit['author']] += 1
            
            # 分支统计
            branch_key = f"{commit['branch_type']}/{commit['branch_name']}"
            stats['commits_by_branch'][branch_key] += 1
            
            # 日期统计
            if commit['date']:
                date_str = commit['date']
                # 提取年份和月份
                date_match = re.search(r'(\d{4})-(\d{2})-\d{2}', date_str)
                if date_match:
                    year = date_match.group(1)
                    month = f"{year}-{date_match.group(2)}"
                    stats['commits_by_year'][year] += 1
                    stats['commits_by_month'][month] += 1
                
                # 最早和最晚日期
                try:
                    date_obj = datetime.fromisoformat(date_str.split('T')[0])
                    if not stats['earliest_date'] or date_obj < stats['earliest_date']:
                        stats['earliest_date'] = date_obj
                    if not stats['latest_date'] or date_obj > stats['latest_date']:
                        stats['latest_date'] = date_obj
                except:
                    pass
            
            # 文件变更统计
            stats['files_changed_by_commit'].append(commit['changed_files_count'])
            
            # 消息长度统计
            stats['message_lengths'].append(commit['message_length'])
        
        # 输出统计信息
        stats_file = self.output_file.replace('.csv', '_stats.txt')
        try:
            with open(stats_file, 'w', encoding='utf-8') as f:
                f.write("=" * 60 + "\n")
                f.write("SVN提交记录统计报告\n")
                f.write("=" * 60 + "\n\n")
                
                f.write(f"仓库URL: {self.svn_url}\n")
                f.write(f"统计时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
                f.write("-" * 60 + "\n\n")
                
                f.write("1. 基本统计\n")
                f.write("-" * 40 + "\n")
                f.write(f"   总提交数: {stats['total_commits']}\n")
                f.write(f"   唯一作者数: {len(stats['unique_authors'])}\n")
                if stats['earliest_date'] and stats['latest_date']:
                    days_diff = (stats['latest_date'] - stats['earliest_date']).days
                    f.write(f"   时间范围: {stats['earliest_date'].date()} 到 {stats['latest_date'].date()} ({days_diff}天)\n")
                f.write(f"   平均每次提交变更文件数: {sum(stats['files_changed_by_commit'])/len(stats['files_changed_by_commit']):.2f}\n")
                f.write(f"   平均提交消息长度: {sum(stats['message_lengths'])/len(stats['message_lengths']):.2f} 字符\n\n")
                
                f.write("2. 按年份统计\n")
                f.write("-" * 40 + "\n")
                for year in sorted(stats['commits_by_year'].keys(), reverse=True):
                    count = stats['commits_by_year'][year]
                    percentage = count / stats['total_commits'] * 100
                    f.write(f"   {year}年: {count:6d} 次提交 ({percentage:5.1f}%)\n")
                
                f.write("\n3. 按作者统计(前20名)\n")
                f.write("-" * 40 + "\n")
                sorted_authors = sorted(
                    stats['commits_by_author'].items(), 
                    key=lambda x: x[1], 
                    reverse=True
                )[:20]
                
                for author, count in sorted_authors:
                    percentage = count / stats['total_commits'] * 100
                    f.write(f"   {author:<30}: {count:6d} 次提交 ({percentage:5.1f}%)\n")
                
                f.write("\n4. 按分支统计\n")
                f.write("-" * 40 + "\n")
                sorted_branches = sorted(
                    stats['commits_by_branch'].items(), 
                    key=lambda x: x[1], 
                    reverse=True
                )
                
                for branch, count in sorted_branches:
                    percentage = count / stats['total_commits'] * 100
                    f.write(f"   {branch:<40}: {count:6d} 次提交 ({percentage:5.1f}%)\n")
                
                f.write("\n5. 活跃月份统计(前20个月)\n")
                f.write("-" * 40 + "\n")
                sorted_months = sorted(
                    stats['commits_by_month'].items(), 
                    key=lambda x: x[1], 
                    reverse=True
                )[:20]
                
                for month, count in sorted_months:
                    f.write(f"   {month}: {count:6d} 次提交\n")
                
                f.write("\n" + "=" * 60 + "\n")
                f.write("统计报告结束\n")
                f.write("=" * 60 + "\n")
            
            logger.info(f"统计信息已保存到 {stats_file}")
            
        except Exception as e:
            logger.error(f"生成统计信息失败: {e}")
    
    def generate_summary(self, commits: List[Dict[str, Any]]):
        """生成简要摘要"""
        summary_file = self.output_file.replace('.csv', '_summary.md')
        
        try:
            with open(summary_file, 'w', encoding='utf-8') as f:
                f.write("# SVN提交记录摘要\n\n")
                f.write(f"**仓库**: {self.svn_url}\n\n")
                f.write(f"**提取时间**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
                f.write(f"**总提交数**: {len(commits)}\n\n")
                
                # 前10个提交
                f.write("## 最近10次提交\n\n")
                f.write("| 版本 | 作者 | 日期 | 消息 |\n")
                f.write("|------|------|------|------|\n")
                for commit in commits[:10]:
                    short_msg = commit['message'][:100] + "..." if len(commit['message']) > 100 else commit['message']
                    short_msg = short_msg.replace('\n', ' ').replace('\r', '')
                    f.write(f"| {commit['revision']} | {commit['author']} | {commit['date'][:10]} | {short_msg} |\n")
                
                f.write("\n## 主要作者\n\n")
                author_counts = {}
                for commit in commits:
                    author_counts[commit['author']] = author_counts.get(commit['author'], 0) + 1
                
                sorted_authors = sorted(author_counts.items(), key=lambda x: x[1], reverse=True)
                for author, count in sorted_authors[:10]:
                    f.write(f"- {author}: {count} 次提交\n")
            
            logger.info(f"摘要报告已保存到 {summary_file}")
        except Exception as e:
            logger.error(f"生成摘要报告失败: {e}")
    
    def run(self, years: int = 10, limit: int = None):
        """运行提取器"""
        logger.info(f"开始提取SVN提交记录")
        logger.info(f"仓库URL: {self.svn_url}")
        logger.info(f"输出文件: {self.output_file}")
        
        # 检查SVN是否安装
        if not self.check_svn_installed():
            logger.error("未检测到SVN客户端,请确保SVN已安装并添加到PATH")
            return False
        
        # 测试连接
        if not self.test_connection():
            logger.error("SVN连接测试失败,请检查网络和认证信息")
            return False
        
        # 计算日期范围
        start_date, end_date = self.calculate_date_range(years)
        logger.info(f"提取时间范围: {start_date} 到 {end_date} ({years}年)")
        
        # 获取日志
        logger.info("正在获取SVN日志,可能需要一些时间...")
        xml_content = self.get_svn_log_xml(start_date, end_date, limit)
        
        if not xml_content:
            logger.error("获取SVN日志失败")
            return False
        
        logger.info(f"获取到SVN日志,大小: {len(xml_content)} 字符")
        
        # 解析日志
        logger.info("正在解析SVN日志...")
        self.commits = self.parse_svn_log_xml(xml_content)
        
        # 保存结果
        if self.commits:
            logger.info(f"成功解析 {len(self.commits)} 条提交记录")
            self.save_to_csv(self.commits)
            return True
        else:
            logger.warning("未提取到任何提交记录")
            return False

def main():
    """主函数"""
    parser = argparse.ArgumentParser(
        description='SVN提交记录提取工具 - 提取近10年的提交记录',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
使用示例:
  # 基本使用,交互式输入凭据
  python svn_log_extractor_auth.py https://svn.example.com/repo -o output.csv
  
  # 提供用户名密码
  python svn_log_extractor_auth.py https://svn.example.com/repo -u username -p password -o output.csv
  
  # 提取5年记录
  python svn_log_extractor_auth.py https://svn.example.com/repo -y 5 -o output.csv
  
  # 保存凭据供以后使用
  python svn_log_extractor_auth.py https://svn.example.com/repo --save-credentials
  
  # 限制提取1000条记录
  python svn_log_extractor_auth.py https://svn.example.com/repo -l 1000 -o output.csv
        """
    )
    
    parser.add_argument('svn_url', help='SVN仓库URL或本地路径')
    parser.add_argument('-o', '--output', default='svn_commits.csv', 
                       help='输出CSV文件路径 (默认: svn_commits.csv)')
    parser.add_argument('-y', '--years', type=int, default=10,
                       help='提取的年数 (默认: 10)')
    parser.add_argument('-l', '--limit', type=int,
                       help='限制提取的提交数量')
    parser.add_argument('-u', '--username', 
                       help='SVN用户名')
    parser.add_argument('-p', '--password', 
                       help='SVN密码(注意:命令行中输入密码可能不安全)')
    parser.add_argument('--save-credentials', action='store_true',
                       help='保存凭据供以后使用')
    parser.add_argument('--no-auth-cache', action='store_true',
                       help='不缓存认证信息')
    parser.add_argument('--debug', action='store_true',
                       help='启用调试模式')
    parser.add_argument('--test-only', action='store_true',
                       help='只测试连接,不提取日志')
    
    args = parser.parse_args()
    
    # 设置日志级别
    if args.debug:
        logging.getLogger().setLevel(logging.DEBUG)
        logger.debug("调试模式已启用")
    
    # 运行提取器
    extractor = SVNLogExtractor(
        svn_url=args.svn_url,
        output_file=args.output,
        username=args.username,
        password=args.password,
        save_credentials=args.save_credentials
    )
    
    if args.test_only:
        # 只测试连接
        if extractor.test_connection():
            logger.info("连接测试成功")
            sys.exit(0)
        else:
            logger.error("连接测试失败")
            sys.exit(1)
    
    # 提取日志
    success = extractor.run(
        years=args.years,
        limit=args.limit
    )
    
    if success:
        logger.info("SVN提交记录提取完成!")
        print("\n" + "=" * 60)
        print("提取完成!")
        print(f"CSV文件: {args.output}")
        print(f"统计报告: {args.output.replace('.csv', '_stats.txt')}")
        print(f"摘要报告: {args.output.replace('.csv', '_summary.md')}")
        print("日志文件: svn_log_extractor.log")
        print("=" * 60)
        sys.exit(0)
    else:
        logger.error("SVN提交记录提取失败")
        sys.exit(1)

if __name__ == '__main__':
    main()

4. 更新pip依赖:python -m pip install --upgrade pip

image

 

5. 安装py脚本需要的依赖: 

 

# 安装keyring库(用于凭据安全存储)
python -m pip install keyring

image

 

# 安装其他有用的库
python -m  pip install pandas matplotlib # 用于数据分析

image

 6. 配置svn到环境变量,如果 没有svn.exe,需要重新进行全量安装svn客户端。

image

 

image

 

6. 运行py脚本保存git记录

image

脚本会生成以下文件:

  1. CSV文件(默认:svn_commits.csv):

    • 包含所有提交记录的详细数据

    • 包含分支信息

  2. 统计报告(svn_commits_stats.txt):

    • 基本统计(总提交数、作者数等)

    • 按年份、月份统计

    • 按作者统计(前20名)

    • 按分支统计

  3. 摘要报告(svn_commits_summary.md):

    • Markdown格式的简要报告

    • 最近10次提交

    • 主要作者列表

  4. 日志文件(svn_log_extractor.log):

    • 详细的执行日志

image

 

posted @ 2025-12-29 10:01  彪悍的代码不需要注释  阅读(2)  评论(0)    收藏  举报
39
0