pinterest或者任意网页数据的处理方法

0.(抽出MJ)抽出midjourney的数据,放到独立文件夹中

1.(批量重命名)将不需要保留网页数据的内容直接批量重命名

2.(处理网页爬虫数据)针对网上爬下来的标题,运行以下代码,处理每个文件名,删特殊符号和数字,但保留逗号,句号,括号以及括号内数字。

 

import os
import re

def clean_filename(filename):
    # 保留括号内的数字
    filename = re.sub(r'(?<!\()\d+(?!\))', '', filename)
    # 将特殊符号(包括下划线)转换为空格,但保留括号、逗号和句点
    filename = re.sub(r'[^\w\s\(\),\.]|_', ' ', filename)
    # 去除文件名开头和结尾的空格
    filename = filename.strip()
    return filename

def is_image_file(filename):
    return filename.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff'))

def unique_file_name(directory, filename):
    base, extension = os.path.splitext(filename)
    counter = 1
    new_filename = filename
    while os.path.exists(os.path.join(directory, new_filename)):
        new_filename = f"{base} ({counter}){extension}"
        counter += 1
    return new_filename

def rename_files_in_directory(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if is_image_file(file):
                new_name = clean_filename(file)
                new_name = unique_file_name(root, new_name)
                old_file_path = os.path.join(root, file)
                new_file_path = os.path.join(root, new_name)
                os.rename(old_file_path, new_file_path)
                print(f'已重命名: {file} -> {new_name}')

def main():
    directory = input("请输入要处理的文件夹路径: ")
    if not os.path.exists(directory):
        print("路径不存在,请输入有效的文件夹路径。")
        return
    rename_files_in_directory(directory)
    print("处理完成。")

if __name__ == "__main__":
    main()

 

3.(用txt加前缀)需要保留网页数据的部分,在文件夹内加文本文件,标题为需要加入的前缀,txt文件加入所有图片名前。(结尾无逗号自动补充逗号)

import os
import re

def is_image_file(filename):
    for ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']:
        if filename.lower().endswith(ext):
            return True
    return False

def get_txt_prefix(directory):
    for file in os.listdir(directory):
        if file.lower().endswith('.txt'):
            prefix = os.path.splitext(file)[0]
            if not prefix.endswith(','):
                prefix += ','
            return prefix
    return None

def rename_images_with_prefix(directory, prefix):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if is_image_file(file):
                new_name = prefix + file
                old_file_path = os.path.join(root, file)
                new_file_path = os.path.join(root, new_name)
                os.rename(old_file_path, new_file_path)
                print(f'已重命名: {file} -> {new_name}')

def main():
    directory = input("请输入要处理的文件夹路径: ")
    for root, dirs, files in os.walk(directory):
        for dir in dirs:
            dir_path = os.path.join(root, dir)
            prefix = get_txt_prefix(dir_path)
            if prefix:
                rename_images_with_prefix(dir_path, prefix)

    print("处理完成。")

if __name__ == "__main__":
    main()

 

4.(SD自然语言反推)利用stable diffusion的BLIP自然语言反推,递归子文件夹得到文本文件。文本文件标题与图片名相同。

 5.(删括号及数字)遍历目录及其子目录下所有txt类文件,删除其中的括号与数字

import os
import re

def remove_numbers_and_brackets(text):
    # 使用正则表达式删除数字和括号
    return re.sub(r'[\d\(\)]', '', text)

def process_directory(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.txt'):
                file_path = os.path.join(root, file)
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
                # 删除数字和括号
                content = remove_numbers_and_brackets(content)
                with open(file_path, 'w', encoding='utf-8') as f:
                    f.write(content)

# 主程序入口
if __name__ == "__main__":
    dir_path = input("请输入要处理的目录路径: ")
    process_directory(dir_path)
    print("处理完成。")

 

posted @ 2023-11-25 22:10  不上火星不改名  阅读(66)  评论(0)    收藏  举报