14 个 Python 高频自动化脚本

共14个,目录如下:

  • 数据处理工具

  • 网络检测工具

  • 系统任务自动化工具

  • 测试自动化工具

  • 文件管理自动化工具

  • 性能监控工具

  • 日志分析工具

  • 邮件自动化工具

  • 数据库交互工具

  • OCR识别

  • PDF操作自动化

  • 网络抓取自动化

  • EXCEL电子表格自动化

  • 图像编辑自动化

数据处理工具

1.1  数据清洗

 

import pandas as pd

def clean_data(input_file, output_file):
    df = pd.read_csv(input_file)
    df.dropna(inplace=True) # 删除空值
    df.drop_duplicates(inplace=True) # 删除重复值
    df.to_csv(output_file, index=False)

# 使用示例
clean_data("data.csv", "cleaned_data.csv")

1.2  数据对比

import pandas as pd

def compare_data(file1, file2):
    df1 = pd.read_csv(file1)
    df2 = pd.read_csv(file2)
    diff = df1.compare(df2)
    return diff

# 使用示例
result = compare_data("file1.csv", "file2.csv")
print(result)

网络检测工具

2.1  检测端口是否开放

import socket

def check_port(host, port):
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    result = sock.connect_ex((host, port))
    sock.close()
    return result == 0

# 使用示例
if check_port("example.com", 80):
    print("Port 80 is open")
else:
    print("Port 80 is closed")

2.2  批量ping测试

import os

def ping_hosts(hosts):
    for host in hosts:
        response = os.system(f"ping -c 1 {host}")
     
        if response == 0:
            print(f"{host} is up")
        else:
            print(f"{host} is down")

# 使用示例
hosts = ["google.com", "example.com", "localhost"]
ping_hosts(hosts)

系统任务自动化

3. 监控磁盘空间

import shutil
def check_disk_space(path, threshold):
    total, used, free = shutil.disk_usage(path)
    free_gb = free // (2**30)
    if free_gb < threshold:
        print(f"Warning: Free disk space is below {threshold} GB.")
    else:
        print(f"Free disk space: {free_gb} GB.")
# 使用示例
check_disk_space('/', 10)

测试自动化

4. 使用unittest进行单元测试

import unittest
class TestMyFunction (unittest.TestCase):
     def test_addition(self):
        result = add(1, 2)
        self.assertEqual(result, 3)
def add(a, b):
    return a + b
# 使用示例
if __name__ == '__main__':
    unittest.main()

文件管理自动化

5.1  按扩展名排序文件

import os
from shutil import move
def sort_files(directory_path):
    for filename in os.listdir(directory_path):
      if os.path.isfile(os.path.join(directory_path, filename)):
         file_extension = filename.split('.')[-1]
            destination_directory = os.path.join(directory_path, file_extension)
            ifnot os.path.exists(destination_directory):
                os.makedirs(destination_directory)
            move(os.path.join(directory_path, filename), os.path.join(destination_directory, filename))
# 使用示例
sort_files('/path/to/directory')

5.2  删除空文件夹

 
import os
def remove_empty_folders(directory_path):
     for root, dirs, files in os.walk(directory_path, topdown=False):
     for folder in dirs:
         folder_path = os.path.join(root, folder)
         if not os.listdir(folder_path):
                os.rmdir(folder_path)

# 使用示例
remove_empty_folders('/path/to/directory')

5.3  批量重命名文件

import os
def batch_rename(directory, prefix):
    for count, filename in enumerate(os.listdir(directory)):
        new_name = f"{prefix}_{count}.txt"
        os.rename(os.path.join(directory, filename), os.path.join(directory, new_name))

# 使用示例
batch_rename("/path/to/files", "file")

5.4  查找大文件

import os

def find_large_files(directory, size_limit_mb):
    size_limit = size_limit_mb * 1024 * 1024# 转换为字节
    large_files = []
    
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            if os.path.getsize(file_path) > size_limit:
                large_files.append(file_path)
    
    
return large_files
# 使用示例
large_files = find_large_files("/path/to/directory", 100) 
# 查找大于100MB的文件
print (large_files)

性能监控工具

6.1  监控cpu和内存使用情况

mport psutil
import time

def monitor_system(interval=1):
    while True:
        cpu_usage = psutil.cpu_percent(interval=interval)
        memory_usage = psutil.virtual_memory().percent
        print(f"CPU Usage: {cpu_usage}% | Memory Usage: {memory_usage}%")
        time.sleep(interval)

# 使用示例
monitor_system(interval=2)

6.2 监控GPU使用情况

 
import pynvml

def monitor_gpu_usage():
    pynvml.nvmlInit()
    device_count = pynvml.nvmlDeviceGetCount()
    
    
   for i in range(device_count):
        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
        util = pynvml.nvmlDeviceGetUtilizationRates(handle)
        memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
        print(f"GPU {i}: Usage={util.gpu}%, Memory Used={memory_info.used / 1024 ** 2} MB")

# 使用示例
monitor_gpu_usage()

6.3 监控网络带宽

import psutil
import time

def monitor_network_usage(interval=1):
    old_value = psutil.net_io_counters().bytes_sent + psutil.net_io_counters().bytes_recv
    
    
while True:
        new_value = psutil.net_io_counters().bytes_sent + psutil.net_io_counters().bytes_recv
        bandwidth = (new_value - old_value) / interval # 计算带宽(字节/秒)
        print(f"Network Bandwidth: {bandwidth} B/s")
        
        old_value = new_value
        time.sleep(interval)

# 使用示例
monitor_network_usage(interval=2)

6.4 监控磁盘IO

import psutil
import time

def monitor_disk_io(interval=1):
    old_read = psutil.disk_io_counters().read_bytes
    old_write = psutil.disk_io_counters().write_bytes
    
    
while True:
        new_read = psutil.disk_io_counters().read_bytes
        new_write = psutil.disk_io_counters().write_bytes
        
        read_speed = (new_read - old_read) / interval
        write_speed = (new_write - old_write) / interval
        
        print(f"Read Speed: {read_speed / 1024} KB/s | Write Speed: {write_speed / 1024} KB/s")
        
        old_read = new_read
        old_write = new_write
        time.sleep(interval)

# 使用示例
monitor_disk_io(interval=2)

6.5 监控进程资源占用

 
import psutil

def monitor_process(pid):
    process = psutil.Process(pid)
    
    
while True:
        cpu_usage = process.cpu_percent(interval=1)
        memory_usage = process.memory_info().rss / 1024 ** 2# 转换为MB
        
        print(f"PID {pid}: CPU={cpu_usage}%, Memory={memory_usage} MB")

# 使用示例
monitor_process(1234) # 替换为目标进程的PID

日志分析工具

7.1 统计日志中高频错误

 
from collections import Counter
import re

def top_n_errors(log_file, n=5):
    error_pattern = re.compile(r"ERROR: (.+)")
    errors = []
    
    with open(log_file, 'r') as f:
     for line in f:
            match = error_pattern.search(line)
            if match:
                errors.append(match.group(1))
    
    
return Counter(errors).most_common(n)

# 使用示例
top_errors = top_n_errors("app.log", n=3)
print(top_errors)

7.2 按时间范围过滤日志

 
from datetime import datetime

def filter_logs_by_time(log_file, start_time, end_time, output_file):
    
   start = datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S")
   end = datetime.strptime(end_time, "%Y-%m-%d %H:%M:%S")
    
 withopen(log_file, 'r') as f:
        
 logs = f.readlines()
 filtered_logs = []
    
 forloginlogs:
        log_time_str = log.split()[0] + " " + log.split()[1] # 假设时间戳在日志的前两部分
        log_time = datetime.strptime(log_time_str, "%Y-%m-%d %H:%M:%S")
        
ifstart <= log_time <= end:
            filtered_logs.append(log)
    
    
withopen(output_file, 'w') as f:
        f.writelines(filtered_logs)

# 使用示例
filter_logs_by_time("app.log", "2025-02-26 12:00:00", "2025-02-06 14:00:00", "filtered_logs.log")

7.3 提取日志中错误信息

def extract_errors(log_file, output_file):
    
with open(log_file, 'r') as f:
        lines = f.readlines()
    
    errors = [line for line in lines if"ERROR"in line]    
    
with open(output_file, 'w') as f:
        f.writelines(errors)

# 使用示例
extract_errors("app.log", "errors.log")

7.4 日志文件合并

def merge_log_files(log_files, output_file):
    
with open(output_file, 'w') as outfile:
        
for log_file in log_files:
    with open(log_file, 'r') as infile:
    outfile.write(infile.read())

# 使用示例
merge_log_files(["log1.log", "log2.log", "log3.log"], "merged_logs.log")

7.5 日志文件实时监控

import time

def tail_log_file(log_file):
    
with open(log_file, 'r') as f:
        f.seek(0, 2) # 移动到文件末尾
        
     while True:
            line = f.readline()
     if line:
            print(line.strip())
     else:
            time.sleep(0.1)

# 使用示例
tail_log_file("app.log")

邮件自动化

8. 发送个性化邮件

 
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

def send_personalized_email(sender_email, sender_password, recipients, subject, body):
    server = smtplib.SMTP('smtp.gmail.com', 587)
    server.starttls()
    server.login(sender_email, sender_password)
for recipient_email in recipients:
        message = MIMEMultipart()
        message['From'] = sender_email
        message['To'] = recipient_email
        message['Subject'] = subject
        message.attach(MIMEText(body, 'plain'))
        server.send_message(message)
    server.quit()

# 使用示例
sender_email = 'your_email@gmail.com'
sender_password = 'your_password'
recipients = ['recipient1@example.com', 'recipient2@example.com']
subject = 'Hello'
body = 'This is a test email.'
send_personalized_email(sender_email, sender_password, recipients, subject, body)

数据库交互

9. 连接到数据库

 
import sqlite3
def connect_to_database(db_path):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
   return conn, cursor

def execute_query(cursor, query):
    cursor.execute(query)
    results = cursor.fetchall()
    return results

# 使用示例
conn, cursor = connect_to_database('/path/to/database.db')
query = 'SELECT * FROM table_name'
results = execute_query(cursor, query)
print(results)
conn.close()

OCR识别

10. 识别图像中的文本

import pytesseract
from PIL import Image
def recognize_text(image_path):
    image = Image.open(image_path)
    text = pytesseract.image_to_string(image,)  # 使用简体中文
    return text

# 使用示例
text = recognize_text('/path/to/image.jpg')
print(text)

PDF操作自动化

11.  从PDF中提取文本

import PyPDF2
def extract_text_from_pdf(pdf_path):
   with open(pdf_path, 'rb') asfile:
        reader = PyPDF2.PdfFileReader(file)
        text = ''
        for page_num inrange(reader.numPages):
            page = reader.getPage(page_num)
            text += page.extractText()
    return text

# 使用示例
text = extract_text_from_pdf('/path/to/document.pdf')
print(text)

网络抓取

12.1 从网站提取数据

import requests
from bs4 import BeautifulSoup

def scrape_data(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    # 从网站提取相关数据的代码在此处
    return soup

# 使用示例
url = 'https://example.com'
soup = scrape_data(url)
print(soup.title.string)

12.2 批量下载图片

import requests
def download_images(url, save_directory):
    response = requests.get(url)
    if response.status_code == 200:
        images = response.json()  # 假设API返回一个图片URL的JSON数组
        for index, image_url in enumerate(images):
            image_response = requests.get(image_url)
        if image_response.status_code == 200:
        with open(f"{save_directory}/image_{index}.jpg", "wb") as f:
        f.write(image_response.content)

# 使用示例
download_images('https://api.example.com/images', '/path/to/save')

Excel电子表格自动化

13. 读取和写入Excel

import pandas as pd
def read_excel (file_path):
    df = pd.read_excel(file_path)
    return df
def write_to_excel(data, file_path):
    df = pd.DataFrame(data)
    df.to_excel(file_path, index=False)

# 使用示例
data = {'Column1': [1, 2, 3], 'Column2': [4, 5, 6]}
write_to_excel(data, '/path/to/output.xlsx')
df = read_excel('/path/to/output.xlsx')
print(df)

图像编辑自动化

14. 调整图像大小

from PIL import Image
def resize_image(input_path, output_path, width, height):
    image = Image.open(input_path)
    resized_image = image.resize((width, height), Image.ANTIALIAS)
    resized_image.save(output_path)

# 使用示例
resize_image('/path/to/input.jpg', '/path/to/output.jpg', 800, 600)

 

posted @ 2025-10-10 09:03  溺水的小金鱼  阅读(168)  评论(0)    收藏  举报