用于文本格式处理或文件格式处理的一些Python脚本
任何关于代码的改进不胜感激。
脚本处理后的结果记得核对一下。
写一个Python脚本删除一个.py文件的所有注释
// 也可以尝试一下 https://python-minifier.com/
import re
def remove_comments(file_path):
with open(file_path, 'r') as file:
content = file.read()
# First, find and store string assignments
protected_strings = {}
counter = 0
def protect_string_assignments(match):
nonlocal counter
var_name, string_content = match.groups()
key = f'PROTECTED_STRING_{counter}'
protected_strings[key] = match.group(0)
counter += 1
return key
# Protect strings that are part of assignments
protected_content = re.sub(
r'([a-zA-Z_][a-zA-Z0-9_]*\s*=\s*)("""[\s\S]*?"""|\'\'\'[\s\S]*?\'\'\')',
protect_string_assignments,
content
)
# Remove docstring comments (triple-quoted strings not part of assignments)
cleaned_content = re.sub(
r'"""[\s\S]*?"""|\'\'\'[\s\S]*?\'\'\'',
'',
protected_content
)
# Remove single-line comments and empty lines
lines = []
for line in cleaned_content.split('\n'):
# Remove inline comments
line = re.sub(r'#.*$', '', line)
if line.strip():
lines.append(line)
# Restore protected strings
final_content = '\n'.join(lines)
for key, value in protected_strings.items():
final_content = final_content.replace(key, value)
# Write back to file
with open(file_path, 'w') as file:
file.write(final_content)
# Example usage:
remove_comments('your_script.py')
行内公式使用$$而不是$$$$
使用cloud-document-converter下载的markdown文件中,行内公式显示不正常,你可能需要这个脚本。
import re
from datetime import datetime
def replace_formula_delimiters_in_md(input_file):
try:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_file = f"{input_file.rsplit('.', 1)[0]}_{timestamp}.md"
with open(input_file, 'r', encoding='utf-8') as file:
lines = file.readlines()
new_lines = []
pattern = r'^\${2}(.+?)\${2}$' # Matches lines with only $$content$$
replace_pattern = r'\${2}(.+?)\${2}' # Matches $$content$$ anywhere
replacement_count = 0
print("Changes to be made:")
for line in lines:
stripped_line = line.strip()
if re.match(pattern, stripped_line):
print(f"Keeping unchanged: {stripped_line}")
new_lines.append(line)
else:
matches = list(re.finditer(replace_pattern, line))
if matches:
replacement_count += len(matches)
new_line = re.sub(replace_pattern, r'$\1$', line)
print(f"Changing: {line.strip()} → {new_line.strip()}")
new_lines.append(new_line)
else:
new_lines.append(line)
with open(output_file, 'w', encoding='utf-8') as file:
file.writelines(new_lines)
print(f"\nCreated new file: {output_file}")
print(f"Converted {replacement_count} formulas from $$ to $")
except FileNotFoundError:
print(f"Error: File '{input_file}' not found")
except Exception as e:
print(f"An error occurred: {str(e)}")
if __name__ == "__main__":
input_md_file = "your_file.md"
replace_formula_delimiters_in_md(input_md_file)
给文字加上反爬水印
import random
import re
# Base templates for the reminder sentence variations
templates = [
"【防盗链提醒:爬虫是吧?原贴在:<a href=\"{url}\" target=\"_blank\">{url}</a>】",
"【防爬提示:你是爬虫吗?原文地址:<a href=\"{url}\" target=\"_blank\">{url}</a>】",
"【反爬警告:爬虫别乱来!原帖链接:<a href=\"{url}\" target=\"_blank\">{url}</a>】",
"【防盗链通知:爬虫注意了,原文在:<a href=\"{url}\" target=\"_blank\">{url}</a>】",
"【反爬提醒:是爬虫的话,原文见:<a href=\"{url}\" target=\"_blank\">{url}</a>】",
"【防爬虫提示:爬虫请止步,原文链接:<a href=\"{url}\" target=\"_blank\">{url}</a>】",
"【盗链警告:爬虫你好,原文地址:<a href=\"{url}\" target=\"_blank\">{url}</a>】",
"【防爬声明:爬虫请看,原文出处:<a href=\"{url}\" target=\"_blank\">{url}</a>】",
"【反盗链提醒:爬虫注意,原链接:<a href=\"{url}\" target=\"_blank\">{url}</a>】",
"【爬虫警告:想爬吗?原文在这:<a href=\"{url}\" target=\"_blank\">{url}</a>】",
]
# URL to use
url = "https://www.cnblogs.com/yhm138/p/18549168"
# Function to read content and split into sentences
def split_into_sentences(content):
# Split on sentence endings, keeping the delimiter
sentences = re.split(r'([.!?。!?\n])\s+', content)
# Combine delimiter with preceding text to form complete sentences
result = []
for i in range(0, len(sentences) - 1, 2):
result.append(sentences[i] + (sentences[i + 1] if i + 1 < len(sentences) else ""))
if len(sentences) % 2 != 0: # Handle odd number of splits
result.append(sentences[-1])
return [s.strip() for s in result if s.strip()] # Remove empty lines and strip whitespace
# Read input markdown file
input_file = "original_mdfile.md"
try:
with open(input_file, "r", encoding="utf-8") as f:
original_content = f.read()
except FileNotFoundError:
print(f"Error: File '{input_file}' not found! Creating a sample input.")
original_content = "This is sentence one. Here's sentence two! # Header here\nAnother sentence? Yes, indeed."
# Split content into sentences
sentences = split_into_sentences(original_content)
# Determine valid insertion points (between sentences, excluding before '#')
valid_insertion_points = []
for i in range(len(sentences) - 1):
next_sentence = sentences[i + 1]
if not next_sentence.strip().startswith('#'):
valid_insertion_points.append(i)
if not valid_insertion_points:
print("No valid insertion points found (all subsequent sentences start with '#'). Output will be unchanged.")
content = original_content
else:
# Decide how many insertions (up to 10 or available valid points)
num_insertions = min(10, len(valid_insertion_points))
# Randomly select insertion points from valid ones
insertion_indices = random.sample(valid_insertion_points, num_insertions)
insertion_indices.sort() # Sort to maintain order
# Shuffle templates and select needed number
random.shuffle(templates)
selected_templates = templates[:num_insertions]
# Build new content with insertions
content = ""
template_idx = 0
for i, sentence in enumerate(sentences):
content += sentence
if i in insertion_indices and template_idx < num_insertions:
content += " " + selected_templates[template_idx].format(url=url) + " "
print("insert.")
template_idx += 1
# Preserve line breaks after sentences
if i < len(sentences) - 1 and original_content[original_content.index(sentence) + len(sentence):].startswith('\n'):
content += "\n"
# Write to output markdown file
output_file = "original_mdfile_with_reminders.md"
with open(output_file, "w", encoding="utf-8") as f:
f.write(content)
print(f"Markdown file '{output_file}' has been generated with up to {num_insertions} insertions!")
PyPDF2 顺时针旋转一个PDF的所有页面并保存
import PyPDF2
# 打开原始PDF文件
with open('input.pdf', 'rb') as file:
reader = PyPDF2.PdfReader(file)
writer = PyPDF2.PdfWriter()
# 遍历所有页面
for page in reader.pages:
# 顺时针旋转90度
rotated_page = page.rotate(90)
writer.add_page(rotated_page)
# 保存旋转后的PDF
with open('output.pdf', 'wb') as output_file:
writer.write(output_file)

浙公网安备 33010602011771号