## 打印"Hello Word"的程序
def helloword():
message = "Hello Word!"
print(message)
helloword()
## 交换两个变量的值
def swap(x, y):
return y, x
a = 5
b = 10
a, b = swap(a, b)
print(f"a={a}")
print(f"b={b}")
## 求1到100之间所有数字的和
def total_value(min, max):
res = sum(range(min, max+1))
print(f"{min}到{max}的和是:{res}")
c = 1
d = 100
total_value(c, d)
## 判断输入的数字是奇数还是偶数
def is_even():
num = int(input("请输入一个整数:"))
print(f"{num}是{'偶数' if num % 2 == 0 else '奇数'}")
is_even()
## 计算给定数字的阶乘
import math
def factorial():
n = int(input("请输入一个整数:"))
if n < 0:
print(f"{n}阶乘无定义(负数无阶乘)")
else:
print(f"{n}的阶乘是{math.factorial(n)}")
factorial()
## 生成斐波那契数列的前n列
def fibonacci(n):
if n <= 0:
return[]
elif n == 1:
return [0]
fib_sequence = [0, 1]
for i in range(2, n):
fib_sequence.append(fib_sequence[-1] + fib_sequence[-2])
return fib_sequence[:n]
print(fibonacci(10))
#[0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
## 去除列表中的重复元素
def remove_duplicates(lst):
# 使用字典键去重
return list(dict.fromkeys(lst))
nums = [3, 1, 3, 4, 2, 10, 9 ,8 ,17, 2, 200]
print(remove_duplicates(nums))
## 反转输入的字符串
def reverse_string(s):
return ''.join(reversed(s))
print(reverse_string("Hello Word"))
## 反转单词顺序
def reverse_words(s):
return " ".join(s.split()[::-1])
print(reverse_words("Hello Word"))
## 判断一个数是否为质数
import math
def is_prime(n):
if n <= 1:
return False
for i in range(2, int(math.sqrt(n)) + 1):
if n % i == 0:
return False
return True
print(is_prime(29))
print(is_prime(27))
## 读取一个文本文件并统计其中单词的出现频率
from collections import Counter
import re
import string
def word_frequency_enhanced(file_path, min_len=3, stopwords=None):
if stopwords is None:
stopwords = set()
word_count = Counter()
translator = str.maketrans('', '', string.punctuation)
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
line = line.translate(translator).lower()
words = [w for w in re.findall(r'\b\w+\b', line) if len(w) >= min_len and w not in stopwords]
word_count.update(words)
return word_count
custom_stopwords = {'the', 'and', 'of', 'to', 'in'}
result = word_frequency_enhanced('example.txt', min_len=4, stopwords=custom_stopwords)
print(result.most_common(20))
import csv
def save_to_csv(word_freq, output_file):
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['Word', 'Count'])
writer.writerows(sorted(word_freq.items(), key=lambda x: -x[1]))
save_to_csv(result, 'word_freq.csv')
## 计时装饰器,测量函数执行时间
# 基础计时装饰器
import time
def timer(func):
def wrapper(*args, **kwargs):
start_time = time.perf_counter()
result = func(*args, **kwargs)
end_time = time.perf_counter()
print(f'函数{func.__name__}执行耗时:{end_time - start_time:.6f}秒')
return result
return wrapper
@timer
def example_function(n, m):
return sum(range(n, m))
example_function(1, 1000000)
#函数example_function执行耗时:0.033827秒
# 类实现的计时装饰器
import time
class Timer:
def __init__(self, func):
self.func = func
def __call__(self, *args, **kwargs):
start_time = time.perf_counter()
result = self.func(*args, **kwargs)
elapsed = time.perf_counter() - start_time
print(f"{self.func.__name__}执行时间:{elapsed:4f}秒")
return result
@Timer
def example_function(n, m):
return sum(range(n, m))
example_function(1, 1000000)
#example_function执行时间:0.028406秒
## 抓取指定网站的信息
import requests
import time
import os
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
class AdvancedCrawler:
def __init__(self, delay=1.0, cache_dir='cache'):
self.delay = delay # 请求延迟
self.cache_dir = cache_dir
os.makedirs(cache_dir, exist_ok=True)
def _get_cache_path(self, url):
domain = urlparse(url).netloc
filename = f"{domain}_{hash(url)}.html"
return os.path.join(self.cache_dir, filename)
def fetch(self, url):
"""带缓存的页面获取"""
cache_path = self._get_cache_path(url)
# 检查缓存
if os.path.exists(cache_path):
with open(cache_path, 'r', encoding='utf-8') as f:
return f.read()
# 网络请求
time.sleep(self.delay) # 遵守爬虫礼仪
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
response = requests.get(url, headers=headers, timeout=10)
html = response.text
# 保存缓存
with open(cache_path, 'w', encoding='utf-8') as f:
f.write(html)
return html
def crawl(self, start_url, max_depth=2):
"""带深度控制的爬取"""
from collections import deque
queue = deque([(start_url, 0)])
visited = set()
results = []
while queue:
url, depth = queue.popleft()
if depth > max_depth:
continue
if url not in visited:
try:
html = self.fetch(url)
soup = BeautifulSoup(html, 'html.parser')
# 提取数据(示例:标题和首个段落)
title = soup.title.string if soup.title else "无标题"
first_para = soup.find('p')
first_para = first_para.get_text() if first_para else "无内容"
results.append({
'url': url,
'depth': depth,
'title': title,
'first_para': first_para[:100] # 截取前100字符
})
# 提取链接
if depth < max_depth:
for link in soup.find_all('a', href=True):
absolute_url = urljoin(url, link['href'])
if urlparse(absolute_url).netloc == urlparse(start_url).netloc: # 同域名
queue.append((absolute_url, depth + 1))
visited.add(url)
except Exception as e:
print(f"抓取失败 {url}: {e}")
return results
# 使用示例
crawler = AdvancedCrawler(delay=1.5)
data = crawler.crawl("https://www.unionmem.com/news_detail-107-103.html", max_depth=1)
for item in data:
print(f"\n深度 {item['depth']}: {item['title']}")
print("简介:", item['first_para'])
## 用Python实现链表、栈、队列等基本数据结构
#链表(Linked List)实现
#单向链表
class ListNode:
"""链表节点类"""
def __init__(self, val=0, next=None):
self.val = val
self.next = next
class LinkedList:
"""单向链表实现"""
def __init__(self):
self.head = None
def append(self, val):
"""在尾部添加节点"""
if not self.head:
self.head = ListNode(val)
else:
current = self.head
while current.next:
current = current.next
current.next = ListNode(val)
def prepend(self, val):
"""在头部添加节点"""
self.head = ListNode(val, self.head)
def delete(self, val):
"""删除指定值的节点"""
if not self.head:
return
if self.head.val == val:
self.head = self.head.next
return
current = self.head
while current.next:
if current.next.val == val:
current.next = current.next.next
return
current = current.next
def display(self):
"""打印链表"""
current = self.head
while current:
print(current.val, end=" -> ")
current = current.next
print("None")
# 使用示例
ll = LinkedList()
ll.append(1)
ll.append(2)
ll.prepend(0)
ll.display() # 输出: 0 -> 1 -> 2 -> None
ll.delete(1)
ll.display() # 输出: 0 -> 2 -> None
#双向链表
class DoublyListNode:
"""双向链表节点"""
def __init__(self, val=0, prev=None, next=None):
self.val = val
self.prev = prev
self.next = next
class DoublyLinkedList:
"""双向链表实现"""
def __init__(self):
self.head = None
self.tail = None
def append(self, val):
"""尾部添加"""
if not self.head:
self.head = self.tail = DoublyListNode(val)
else:
self.tail.next = DoublyListNode(val, self.tail)
self.tail = self.tail.next
def prepend(self, val):
"""头部添加"""
if not self.head:
self.head = self.tail = DoublyListNode(val)
else:
self.head.prev = DoublyListNode(val, None, self.head)
self.head = self.head.prev
def display_forward(self):
"""正向打印"""
current = self.head
while current:
print(current.val, end=" <-> ")
current = current.next
print("None")
def display_backward(self):
"""反向打印"""
current = self.tail
while current:
print(current.val, end=" <-> ")
current = current.prev
print("None")
# 使用示例
dll = DoublyLinkedList()
dll.append(1)
dll.append(2)
dll.prepend(0)
dll.display_forward() # 输出: 0 <-> 1 <-> 2 <-> None
dll.display_backward() # 输出: 2 <-> 1 <-> 0 <-> None
#栈(Stack)实现
#使用列表实现
class Stack:
"""栈实现(后进先出)"""
def __init__(self):
self.items = []
def push(self, item):
"""入栈"""
self.items.append(item)
def pop(self):
"""出栈"""
if not self.is_empty():
return self.items.pop()
return None
def peek(self):
"""查看栈顶元素"""
if not self.is_empty():
return self.items[-1]
return None
def is_empty(self):
"""判断是否为空"""
return len(self.items) == 0
def size(self):
"""栈大小"""
return len(self.items)
# 使用示例
s = Stack()
s.push(1)
s.push(2)
print(s.pop()) # 输出: 2
print(s.peek()) # 输出: 1
#使用链表实现
class LinkedStack:
"""链表实现的栈"""
def __init__(self):
self.top = None
def push(self, val):
"""入栈"""
self.top = ListNode(val, self.top)
def pop(self):
"""出栈"""
if self.top:
val = self.top.val
self.top = self.top.next
return val
return None
def peek(self):
"""查看栈顶"""
return self.top.val if self.top else None
def is_empty(self):
return self.top is None
# 使用示例
ls = LinkedStack()
ls.push(10)
ls.push(20)
print(ls.pop()) # 输出: 20
#队列(Queue)实现
#使用列表实现(简单但低效)
class ListQueue:
"""列表实现的队列(先进先出)"""
def __init__(self):
self.items = []
def enqueue(self, item):
"""入队"""
self.items.insert(0, item)
def dequeue(self):
"""出队"""
if not self.is_empty():
return self.items.pop()
return None
def is_empty(self):
return len(self.items) == 0
def size(self):
return len(self.items)
# 使用示例
q = ListQueue()
q.enqueue("A")
q.enqueue("B")
print(q.dequeue()) # 输出: "A"
#高效队列(使用collections.deque)
from collections import deque
class EfficientQueue:
"""高效双端队列实现"""
def __init__(self):
self.queue = deque()
def enqueue(self, item):
self.queue.appendleft(item)
def dequeue(self):
return self.queue.pop() if self.queue else None
def size(self):
return len(self.queue)
# 使用示例
eq = EfficientQueue()
eq.enqueue(1)
eq.enqueue(2)
print(eq.dequeue()) # 输出: 1
#链表实现队列
class LinkedQueue:
"""链表实现的队列"""
def __init__(self):
self.front = self.rear = None
def enqueue(self, val):
"""入队"""
if not self.rear:
self.front = self.rear = ListNode(val)
else:
self.rear.next = ListNode(val)
self.rear = self.rear.next
def dequeue(self):
"""出队"""
if not self.front:
return None
val = self.front.val
self.front = self.front.next
if not self.front:
self.rear = None
return val
# 使用示例
lq = LinkedQueue()
lq.enqueue("X")
lq.enqueue("Y")
print(lq.dequeue()) # 输出: "X"
#双端队列(Deque)实现
class Deque:
"""双端队列实现"""
def __init__(self):
self.items = []
def add_front(self, item):
self.items.append(item)
def add_rear(self, item):
self.items.insert(0, item)
def remove_front(self):
return self.items.pop() if self.items else None
def remove_rear(self):
return self.items.pop(0) if self.items else None
def size(self):
return len(self.items)
# 使用示例
d = Deque()
d.add_rear(1)
d.add_front(2)
print(d.remove_rear()) # 输出: 1
#哈希表(Hash Table)实现
class HashTable:
"""简单哈希表实现"""
def __init__(self, size=10):
self.size = size
self.table = [[] for _ in range(size)]
def _hash(self, key):
return hash(key) % self.size
def set(self, key, value):
"""添加键值对"""
hash_key = self._hash(key)
for i, (k, v) in enumerate(self.table[hash_key]):
if k == key:
self.table[hash_key][i] = (key, value)
return
self.table[hash_key].append((key, value))
def get(self, key):
"""获取值"""
hash_key = self._hash(key)
for k, v in self.table[hash_key]:
if k == key:
return v
return None
def delete(self, key):
"""删除键值对"""
hash_key = self._hash(key)
for i, (k, v) in enumerate(self.table[hash_key]):
if k == key:
del self.table[hash_key][i]
return
# 使用示例
ht = HashTable()
ht.set("name", "Alice")
ht.set("age", 25)
print(ht.get("name")) # 输出: "Alice"
ht.delete("age")
## 实现常见的排序算法(快速排序、归并排序等)
#快速排序(Quick Sort)
def quick_sort(arr):
"""快速排序(分治法)"""
if len(arr) <= 1:
return arr
pivot = arr[len(arr) // 2] # 选择中间元素作为基准
left = [x for x in arr if x < pivot]
middle = [x for x in arr if x == pivot]
right = [x for x in arr if x > pivot]
return quick_sort(left) + middle + quick_sort(right)
# 原地排序版本(更高效)
def quick_sort_inplace(arr, low=0, high=None):
"""原地快速排序"""
if high is None:
high = len(arr) - 1
if low < high:
# 分区操作
pivot_index = partition(arr, low, high)
# 递归排序子数组
quick_sort_inplace(arr, low, pivot_index - 1)
quick_sort_inplace(arr, pivot_index + 1, high)
def partition(arr, low, high):
"""快速排序的分区函数"""
pivot = arr[high] # 选择最后一个元素作为基准
i = low - 1 # 小于基准的元素的边界
for j in range(low, high):
if arr[j] <= pivot:
i += 1
arr[i], arr[j] = arr[j], arr[i]
arr[i + 1], arr[high] = arr[high], arr[i + 1]
return i + 1
# 使用示例
nums = [3, 6, 8, 10, 1, 2, 1]
print("快速排序:", quick_sort(nums)) # 输出: [1, 1, 2, 3, 6, 8, 10]
nums = [3, 6, 8, 10, 1, 2, 1]
quick_sort_inplace(nums)
print("原地快排:", nums) # 输出: [1, 1, 2, 3, 6, 8, 10]
#归并排序(Merge Sort)
def merge_sort(arr):
"""归并排序(分治法)"""
if len(arr) <= 1:
return arr
mid = len(arr) // 2
left = merge_sort(arr[:mid])
right = merge_sort(arr[mid:])
return merge(left, right)
def merge(left, right):
"""合并两个有序数组"""
result = []
i = j = 0
while i < len(left) and j < len(right):
if left[i] < right[j]:
result.append(left[i])
i += 1
else:
result.append(right[j])
j += 1
result.extend(left[i:])
result.extend(right[j:])
return result
# 使用示例
nums = [12, 11, 13, 5, 6, 7]
print("归并排序:", merge_sort(nums)) # 输出: [5, 6, 7, 11, 12, 13]
#堆排序(Heap Sort)
def heap_sort(arr):
"""堆排序"""
def heapify(arr, n, i):
largest = i
l = 2 * i + 1
r = 2 * i + 2
if l < n and arr[l] > arr[largest]:
largest = l
if r < n and arr[r] > arr[largest]:
largest = r
if largest != i:
arr[i], arr[largest] = arr[largest], arr[i]
heapify(arr, n, largest)
n = len(arr)
# 构建最大堆
for i in range(n // 2 - 1, -1, -1):
heapify(arr, n, i)
# 逐个提取元素
for i in range(n - 1, 0, -1):
arr[i], arr[0] = arr[0], arr[i] # 交换
heapify(arr, i, 0)
# 使用示例
nums = [12, 11, 13, 5, 6, 7]
heap_sort(nums)
print("堆排序:", nums) # 输出: [5, 6, 7, 11, 12, 13]
#冒泡排序(Bubble Sort)
def bubble_sort(arr):
"""冒泡排序"""
n = len(arr)
for i in range(n):
# 提前退出标志
swapped = False
for j in range(0, n - i - 1):
if arr[j] > arr[j + 1]:
arr[j], arr[j + 1] = arr[j + 1], arr[j]
swapped = True
if not swapped: # 如果没有发生交换,提前退出
break
# 使用示例
nums = [64, 34, 25, 12, 22, 11, 90]
bubble_sort(nums)
print("冒泡排序:", nums) # 输出: [11, 12, 22, 25, 34, 64, 90]
#选择排序(Selection Sort)
def selection_sort(arr):
"""选择排序"""
for i in range(len(arr)):
min_idx = i
for j in range(i + 1, len(arr)):
if arr[j] < arr[min_idx]:
min_idx = j
arr[i], arr[min_idx] = arr[min_idx], arr[i]
# 使用示例
nums = [29, 10, 14, 37, 13]
selection_sort(nums)
print("选择排序:", nums) # 输出: [10, 13, 14, 29, 37]
#插入排序(Insertion Sort)
def insertion_sort(arr):
"""插入排序"""
for i in range(1, len(arr)):
key = arr[i]
j = i - 1
while j >= 0 and key < arr[j]:
arr[j + 1] = arr[j]
j -= 1
arr[j + 1] = key
# 使用示例
nums = [12, 11, 13, 5, 6]
insertion_sort(nums)
print("插入排序:", nums) # 输出: [5, 6, 11, 12, 13]
#希尔排序(Shell Sort)
def shell_sort(arr):
"""希尔排序(改进的插入排序)"""
n = len(arr)
gap = n // 2
while gap > 0:
for i in range(gap, n):
temp = arr[i]
j = i
while j >= gap and arr[j - gap] > temp:
arr[j] = arr[j - gap]
j -= gap
arr[j] = temp
gap //= 2
# 使用示例
nums = [12, 34, 54, 2, 3]
shell_sort(nums)
print("希尔排序:", nums) # 输出: [2, 3, 12, 34, 54]
#计数排序(Counting Sort)
def counting_sort(arr):
"""计数排序(非比较排序,适用于小范围整数)"""
max_val = max(arr)
count = [0] * (max_val + 1)
for num in arr:
count[num] += 1
sorted_arr = []
for i in range(len(count)):
sorted_arr.extend([i] * count[i])
return sorted_arr
# 使用示例
nums = [4, 2, 2, 8, 3, 3, 1]
print("计数排序:", counting_sort(nums)) # 输出: [1, 2, 2, 3, 3, 4, 8]
#基数排序(Radix Sort)
def radix_sort(arr):
"""基数排序(非比较排序)"""
max_num = max(arr)
exp = 1
while max_num // exp > 0:
counting_sort_by_digit(arr, exp)
exp *= 10
def counting_sort_by_digit(arr, exp):
"""按指定位进行计数排序"""
n = len(arr)
output = [0] * n
count = [0] * 10
for i in range(n):
index = arr[i] // exp
count[index % 10] += 1
for i in range(1, 10):
count[i] += count[i - 1]
i = n - 1
while i >= 0:
index = arr[i] // exp
output[count[index % 10] - 1] = arr[i]
count[index % 10] -= 1
i -= 1
for i in range(n):
arr[i] = output[i]
# 使用示例
nums = [170, 45, 75, 90, 802, 24, 2, 66]
radix_sort(nums)
print("基数排序:", nums) # 输出: [2, 24, 45, 66, 75, 90, 170, 802]
#桶排序(Bucket Sort)
def bucket_sort(arr, bucket_size=0.1):
def insertion_sort(bucket):
for i in range(1, len(bucket)):
key = bucket[i]
j = i - 1
while j >= 0 and bucket[j] > key:
bucket[j + 1] = bucket[j]
j -= 1
bucket[j + 1] = key
"""桶排序"""
if len(arr) == 0:
return arr
# 确定范围
min_val = min(arr)
max_val = max(arr)
# 初始化桶
bucket_count = int((max_val - min_val) / bucket_size) + 1
buckets = [[] for _ in range(bucket_count)]
# 分配元素到桶中
for num in arr:
buckets[int((num - min_val) / bucket_size)].append(num)
# 对每个桶排序并合并
sorted_arr = []
for bucket in buckets:
insertion_sort(bucket)
sorted_arr.extend(sorted(bucket))
return sorted_arr
# 使用示例
nums = [0.42, 0.32, 0.33, 0.52, 0.37, 0.47, 0.51]
print("桶排序:", bucket_sort(nums)) # 输出: [0.32, 0.33, 0.37, 0.42, 0.47, 0.51, 0.52]
## 多线程或多进程实现一个简单的任务并行处理程序
#法1
import threading
import time
from queue import Queue
def worker(task_queue, result_queue):
"""工作线程函数"""
while True:
task = task_queue.get()
if task is None: # 终止信号
break
# 模拟I/O密集型任务(如网络请求)
time.sleep(0.5)
result = f"Processed: {task}"
result_queue.put(result)
task_queue.task_done()
def parallel_thread_processing(tasks, num_workers=4):
"""多线程并行处理"""
task_queue = Queue()
result_queue = Queue()
# 创建并启动工作线程
threads = []
for _ in range(num_workers):
t = threading.Thread(target=worker, args=(task_queue, result_queue))
t.start()
threads.append(t)
# 添加任务到队列
for task in tasks:
task_queue.put(task)
# 等待所有任务完成
task_queue.join()
# 停止工作线程
for _ in range(num_workers):
task_queue.put(None)
for t in threads:
t.join()
# 收集结果
results = []
while not result_queue.empty():
results.append(result_queue.get())
return results
# 使用示例
tasks = ["task1", "task2", "task3", "task4", "task5", "task6"]
print("多线程结果:", parallel_thread_processing(tasks))
#法2
from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED, as_completed
@staticmethod
def exec_many_workflow(func, args=None, workers=10, choice=1, timeout=30, **kwargs):
"""多线程执行业务状态
Args:
func: 被调函数的函数名
args: 被调函数的参数 (1,2,3,4) 代表 func(1) func(2) func(3) 具体入参情况看具体函数处理
workers: 线程数 5 代表有5个线程同时运行某个函数
choice: 1 代表所有线程执行完之后返回线程详情 2 代表只要其中一个线程有返回值 获取即可 其他线程默认继续执行 不影响值获取
**kwargs: 后期场景参数
Returns:
[future,...] 或者 data数据, 外部接口可以根据需要的返回值 [i.result() for i in future]进行获取多线程的结果
"""
num = kwargs.get('num', None)
with ThreadPoolExecutor(max_workers=workers) as executor:
if isinstance(func, list) or isinstance(func, tuple):
if args:
all_task = [
executor.submit(f, **arg)
if isinstance(arg, dict)
else executor.submit(f, *arg)
if isinstance(arg, list) or isinstance(arg, tuple)
else executor.submit(f, arg)
for f, arg in zip(func, args)
]
else:
all_task = [executor.submit(f) for f in func]
else:
if args:
all_task = [
executor.submit(func, **arg)
if isinstance(arg, dict)
else executor.submit(func, *arg)
if isinstance(arg, list) or isinstance(arg, tuple)
else executor.submit(func, arg)
for arg in args
]
else:
if num:
all_task = [executor.submit(func) for i in range(num)]
else:
all_task = [executor.submit(func)]
if choice == 1:
wait(all_task, timeout=timeout, return_when=ALL_COMPLETED)
if choice == 2:
for future in as_completed(all_task):
data = future.result()
# log.info(f"get data is :{data}")
if data:
# log.info(f"get value success :{data}")
return data
return all_task
print(exec_many_workflow(workers=2, func=[is_prime, is_prime], args=([27], [29]), timeout=60))