利用AI生成批量导出内核源码的脚本
最近用AI实现了一个脚本,用于将内核源码全部导出为txt文件,路径全部平坦化,而且支持过滤,下面是跟AI的对话:
https://yuanbao.tencent.com/bot/app/share/chat/GjdXiqdQaoVn
下面是最终生成的脚本,非常不错:
kernel_flatten.py
#!/usr/bin/env python3
"""
Linux内核源码导出工具(带分组功能)
支持按编号分组存储文件,解决大数量文件管理问题
"""
import os
import shutil
import argparse
import re
import sys
import math
from collections import OrderedDict
def sanitize_filename(path):
"""路径安全化:替换特殊字符为下划线"""
return re.sub(r'[^\w\-_.]', '_', path)
class FilterRule:
"""过滤规则处理器"""
def __init__(self, pattern, is_include, is_suffix=False):
self.pattern = pattern
self.is_include = is_include
self.is_suffix = is_suffix
def match(self, rel_path, is_dir=False):
"""匹配目录或后缀规则"""
if self.is_suffix:
# 后缀匹配(大小写不敏感)
return rel_path.lower().endswith(self.pattern.lower())
else:
# 目录匹配(支持子目录递归)
pattern = self.pattern.replace('\\', '/')
target_path = rel_path.replace('\\', '/')
return (target_path == pattern or
target_path.startswith(pattern + '/')) if is_dir else pattern in target_path
def parse_filter_args(filter_args, exclude_args):
"""解析命令行过滤参数,生成优先级规则链"""
rule_chain = OrderedDict()
sequence = []
# 处理包含规则(-f)
if filter_args:
for item in filter_args.split(','):
item = item.strip()
if not item: continue
is_suffix = item.startswith('.')
rule = FilterRule(item, True, is_suffix)
key = f"include_{item}"
rule_chain[key] = rule
sequence.append(key)
# 处理排除规则(-n)
if exclude_args:
for item in exclude_args.split(','):
item = item.strip()
if not item: continue
is_suffix = item.startswith('.')
rule = FilterRule(item, False, is_suffix)
key = f"exclude_{item}"
rule_chain[key] = rule # 后出现规则覆盖先出现规则
sequence.append(key)
return [rule_chain[key] for key in sequence]
def should_include_file(rel_path, rules):
"""根据规则链决策是否包含文件"""
dir_part = os.path.dirname(rel_path)
file_name = os.path.basename(rel_path)
if not rules:
return True # 无规则时默认包含
# 逆序应用规则(后出现优先级更高)
for rule in reversed(rules):
if not rule.is_suffix and rule.match(dir_part, is_dir=True):
return rule.is_include
if rule.is_suffix and rule.match(file_name):
return rule.is_include
return not any(rule.is_include for rule in rules) # 无匹配时的默认策略
def process_source_tree(source_dir, output_dir, prefix="", rules=None, digits=0, num_groups=0):
"""处理文件并分组存储"""
safe_prefix = sanitize_filename(prefix).rstrip('_') if prefix else ""
files_to_export = [] # 存储需要导出的文件路径
# 第一次遍历:收集需要导出的文件
total_count = 0
for root, _, files in os.walk(source_dir):
for filename in files:
total_count += 1
src_path = os.path.join(root, filename)
rel_path = os.path.relpath(src_path, source_dir)
if should_include_file(rel_path, rules):
files_to_export.append(rel_path)
included_count = len(files_to_export)
if included_count == 0:
print("没有需要导出的文件")
return total_count, 0, 0
# 计算分组参数
group_size = 0
group_dirs = []
if num_groups > 0:
group_size = math.ceil(included_count / num_groups) # 每组文件数(向上取整)
# 创建分组目录
for i in range(1, num_groups + 1):
group_dir = os.path.join(output_dir, f"group_{i}")
os.makedirs(group_dir, exist_ok=True)
group_dirs.append(group_dir)
print(f"创建 {num_groups} 个分组目录,每组最多 {group_size} 个文件")
# 第二次遍历:处理文件并分组
current_index = 0
for rel_path in files_to_export:
current_index += 1
src_path = os.path.join(source_dir, rel_path)
# 确定目标目录
if num_groups > 0:
group_index = (current_index - 1) // group_size # 计算分组索引
target_dir = group_dirs[group_index]
else:
target_dir = output_dir
# 生成带编号的文件名
counter_str = f"{current_index:0{digits}d}_" if digits > 0 else ""
base_name = sanitize_filename(rel_path)
# 智能后缀处理
_, ext = os.path.splitext(os.path.basename(rel_path))
if ext.lower() == '.txt':
encoded_name = base_name
else:
encoded_name = base_name + ".txt"
# 组合最终文件名
if safe_prefix:
final_name = f"{counter_str}{safe_prefix}_{encoded_name}"
else:
final_name = f"{counter_str}{encoded_name}"
dest_path = os.path.join(target_dir, final_name)
# 复制文件
shutil.copy2(src_path, dest_path)
# 打印处理信息
if num_groups > 0:
group_info = f" → group_{group_index+1}/{final_name}"
else:
group_info = f" → {final_name}"
print(f"Processed [{current_index}/{included_count}]: {rel_path}{group_info}")
return total_count, included_count, current_index
def main():
parser = argparse.ArgumentParser(
description="Linux内核源码导出工具(分组存储版)",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("-s", "--source", required=True, help="内核源码根目录路径")
parser.add_argument("-o", "--output", required=True, help="输出目录路径")
parser.add_argument("-p", "--prefix", default="",
help="文件名前缀(下划线分隔)")
parser.add_argument("-f", "--filter", default="",
help="包含规则(逗号分隔,如 'kernel,.c')")
parser.add_argument("-n", "--exclude", default="",
help="排除规则(逗号分隔,如 'docs,.txt')")
parser.add_argument("-d", "--digits", type=int, default=5,
help="编号位数(0=禁用,建议≥4)")
parser.add_argument("-t", "--groups", type=int, default=0,
help="分组数量(0=不分组)")
args = parser.parse_args()
# 路径验证
if not os.path.isdir(args.source):
print(f"错误:源目录不存在 {args.source}")
sys.exit(1)
# 创建输出目录
os.makedirs(args.output, exist_ok=True)
# 解析过滤规则
rules = parse_filter_args(args.filter, args.exclude)
# 打印参数摘要
print(f"源码目录: {args.source}")
print(f"输出目录: {args.output}")
print(f"文件名前缀: '{args.prefix}'" if args.prefix else "未指定前缀")
print(f"编号设置: {args.digits}位数字" if args.digits > 0 else "编号功能已禁用")
print(f"分组设置: {args.groups}组" if args.groups > 0 else "不启用分组")
if args.filter: print(f"包含规则: {args.filter}")
if args.exclude: print(f"排除规则: {args.exclude}")
if rules:
print("\n激活规则(优先级降序):")
for i, rule in enumerate(reversed(rules), 1):
rule_type = "包含" if rule.is_include else "排除"
rule_target = "后缀" if rule.is_suffix else "目录"
print(f" {i}. [{rule_type}] {rule_target} '{rule.pattern}'")
print("\n" + "=" * 60)
# 执行文件处理
total, included, seq_count = process_source_tree(
args.source,
args.output,
args.prefix,
rules,
args.digits,
args.groups
)
print("=" * 60)
print(f"处理完成!扫描文件: {total}, 导出文件: {included}")
if args.digits > 0:
print(f"文件编号范围: 1 - {seq_count} (位数:{args.digits})")
if seq_count >= (10 ** args.digits):
print(f"⚠️ 警告: 文件数超过编号容量,请增加 --digits 参数值")
if __name__ == "__main__":
main()
本文来自博客园,作者:dolinux,未经同意,禁止转载

浙公网安备 33010602011771号