代码改变世界

python基础

2025-08-04 14:40  Tanwheey  阅读(6)  评论(0)    收藏  举报
## 打印"Hello Word"的程序
def helloword():
message = "Hello Word!"
print(message)

helloword()


## 交换两个变量的值
def swap(x, y):
return y, x

a = 5
b = 10
a, b = swap(a, b)
print(f"a={a}")
print(f"b={b}")


## 求1到100之间所有数字的和
def total_value(min, max):
res = sum(range(min, max+1))
print(f"{min}到{max}的和是:{res}")

c = 1
d = 100
total_value(c, d)


## 判断输入的数字是奇数还是偶数
def is_even():
num = int(input("请输入一个整数:"))
print(f"{num}是{'偶数' if num % 2 == 0 else '奇数'}")

is_even()


## 计算给定数字的阶乘
import math

def factorial():
n = int(input("请输入一个整数:"))
if n < 0:
print(f"{n}阶乘无定义(负数无阶乘)")
else:
print(f"{n}的阶乘是{math.factorial(n)}")

factorial()


## 生成斐波那契数列的前n列
def fibonacci(n):
if n <= 0:
return[]
elif n == 1:
return [0]

fib_sequence = [0, 1]
for i in range(2, n):
fib_sequence.append(fib_sequence[-1] + fib_sequence[-2])
return fib_sequence[:n]

print(fibonacci(10))
#[0, 1, 1, 2, 3, 5, 8, 13, 21, 34]


## 去除列表中的重复元素
def remove_duplicates(lst):
# 使用字典键去重
return list(dict.fromkeys(lst))

nums = [3, 1, 3, 4, 2, 10, 9 ,8 ,17, 2, 200]
print(remove_duplicates(nums))


## 反转输入的字符串
def reverse_string(s):
return ''.join(reversed(s))

print(reverse_string("Hello Word"))


## 反转单词顺序
def reverse_words(s):
return " ".join(s.split()[::-1])

print(reverse_words("Hello Word"))


## 判断一个数是否为质数
import math

def is_prime(n):
if n <= 1:
return False
for i in range(2, int(math.sqrt(n)) + 1):
if n % i == 0:
return False
return True

print(is_prime(29))
print(is_prime(27))


## 读取一个文本文件并统计其中单词的出现频率
from collections import Counter
import re
import string

def word_frequency_enhanced(file_path, min_len=3, stopwords=None):
if stopwords is None:
stopwords = set()

word_count = Counter()
translator = str.maketrans('', '', string.punctuation)

with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
line = line.translate(translator).lower()
words = [w for w in re.findall(r'\b\w+\b', line) if len(w) >= min_len and w not in stopwords]
word_count.update(words)

return word_count

custom_stopwords = {'the', 'and', 'of', 'to', 'in'}
result = word_frequency_enhanced('example.txt', min_len=4, stopwords=custom_stopwords)
print(result.most_common(20))

import csv

def save_to_csv(word_freq, output_file):
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['Word', 'Count'])
writer.writerows(sorted(word_freq.items(), key=lambda x: -x[1]))

save_to_csv(result, 'word_freq.csv')


## 计时装饰器,测量函数执行时间
# 基础计时装饰器
import time

def timer(func):
def wrapper(*args, **kwargs):
start_time = time.perf_counter()
result = func(*args, **kwargs)
end_time = time.perf_counter()
print(f'函数{func.__name__}执行耗时:{end_time - start_time:.6f}秒')
return result
return wrapper

@timer
def example_function(n, m):
return sum(range(n, m))
example_function(1, 1000000)
#函数example_function执行耗时:0.033827秒

# 类实现的计时装饰器
import time

class Timer:
def __init__(self, func):
self.func = func

def __call__(self, *args, **kwargs):
start_time = time.perf_counter()
result = self.func(*args, **kwargs)
elapsed = time.perf_counter() - start_time
print(f"{self.func.__name__}执行时间:{elapsed:4f}秒")
return result

@Timer
def example_function(n, m):
return sum(range(n, m))
example_function(1, 1000000)
#example_function执行时间:0.028406秒


## 抓取指定网站的信息
import requests
import time
import os
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin


class AdvancedCrawler:
def __init__(self, delay=1.0, cache_dir='cache'):
self.delay = delay # 请求延迟
self.cache_dir = cache_dir
os.makedirs(cache_dir, exist_ok=True)

def _get_cache_path(self, url):
domain = urlparse(url).netloc
filename = f"{domain}_{hash(url)}.html"
return os.path.join(self.cache_dir, filename)

def fetch(self, url):
"""带缓存的页面获取"""
cache_path = self._get_cache_path(url)

# 检查缓存
if os.path.exists(cache_path):
with open(cache_path, 'r', encoding='utf-8') as f:
return f.read()

# 网络请求
time.sleep(self.delay) # 遵守爬虫礼仪
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
response = requests.get(url, headers=headers, timeout=10)
html = response.text

# 保存缓存
with open(cache_path, 'w', encoding='utf-8') as f:
f.write(html)

return html

def crawl(self, start_url, max_depth=2):
"""带深度控制的爬取"""
from collections import deque

queue = deque([(start_url, 0)])
visited = set()
results = []

while queue:
url, depth = queue.popleft()
if depth > max_depth:
continue

if url not in visited:
try:
html = self.fetch(url)
soup = BeautifulSoup(html, 'html.parser')

# 提取数据(示例:标题和首个段落)
title = soup.title.string if soup.title else "无标题"
first_para = soup.find('p')
first_para = first_para.get_text() if first_para else "无内容"

results.append({
'url': url,
'depth': depth,
'title': title,
'first_para': first_para[:100] # 截取前100字符
})

# 提取链接
if depth < max_depth:
for link in soup.find_all('a', href=True):
absolute_url = urljoin(url, link['href'])
if urlparse(absolute_url).netloc == urlparse(start_url).netloc: # 同域名
queue.append((absolute_url, depth + 1))

visited.add(url)

except Exception as e:
print(f"抓取失败 {url}: {e}")

return results


# 使用示例
crawler = AdvancedCrawler(delay=1.5)
data = crawler.crawl("https://www.unionmem.com/news_detail-107-103.html", max_depth=1)
for item in data:
print(f"\n深度 {item['depth']}: {item['title']}")
print("简介:", item['first_para'])

## 用Python实现链表、栈、队列等基本数据结构
#链表(Linked List)实现
#单向链表
class ListNode:
"""链表节点类"""
def __init__(self, val=0, next=None):
self.val = val
self.next = next

class LinkedList:
"""单向链表实现"""
def __init__(self):
self.head = None

def append(self, val):
"""在尾部添加节点"""
if not self.head:
self.head = ListNode(val)
else:
current = self.head
while current.next:
current = current.next
current.next = ListNode(val)

def prepend(self, val):
"""在头部添加节点"""
self.head = ListNode(val, self.head)

def delete(self, val):
"""删除指定值的节点"""
if not self.head:
return

if self.head.val == val:
self.head = self.head.next
return

current = self.head
while current.next:
if current.next.val == val:
current.next = current.next.next
return
current = current.next

def display(self):
"""打印链表"""
current = self.head
while current:
print(current.val, end=" -> ")
current = current.next
print("None")

# 使用示例
ll = LinkedList()
ll.append(1)
ll.append(2)
ll.prepend(0)
ll.display() # 输出: 0 -> 1 -> 2 -> None
ll.delete(1)
ll.display() # 输出: 0 -> 2 -> None

#双向链表
class DoublyListNode:
"""双向链表节点"""

def __init__(self, val=0, prev=None, next=None):
self.val = val
self.prev = prev
self.next = next

class DoublyLinkedList:
"""双向链表实现"""
def __init__(self):
self.head = None
self.tail = None
def append(self, val):
"""尾部添加"""
if not self.head:
self.head = self.tail = DoublyListNode(val)
else:
self.tail.next = DoublyListNode(val, self.tail)
self.tail = self.tail.next

def prepend(self, val):
"""头部添加"""
if not self.head:
self.head = self.tail = DoublyListNode(val)
else:
self.head.prev = DoublyListNode(val, None, self.head)
self.head = self.head.prev

def display_forward(self):
"""正向打印"""
current = self.head
while current:
print(current.val, end=" <-> ")
current = current.next
print("None")

def display_backward(self):
"""反向打印"""
current = self.tail
while current:
print(current.val, end=" <-> ")
current = current.prev
print("None")

# 使用示例
dll = DoublyLinkedList()
dll.append(1)
dll.append(2)
dll.prepend(0)
dll.display_forward() # 输出: 0 <-> 1 <-> 2 <-> None
dll.display_backward() # 输出: 2 <-> 1 <-> 0 <-> None

#栈(Stack)实现
#使用列表实现
class Stack:
"""栈实现(后进先出)"""
def __init__(self):
self.items = []

def push(self, item):
"""入栈"""
self.items.append(item)

def pop(self):
"""出栈"""
if not self.is_empty():
return self.items.pop()
return None

def peek(self):
"""查看栈顶元素"""
if not self.is_empty():
return self.items[-1]
return None

def is_empty(self):
"""判断是否为空"""
return len(self.items) == 0

def size(self):
"""栈大小"""
return len(self.items)

# 使用示例
s = Stack()
s.push(1)
s.push(2)
print(s.pop()) # 输出: 2
print(s.peek()) # 输出: 1

#使用链表实现
class LinkedStack:
"""链表实现的栈"""
def __init__(self):
self.top = None

def push(self, val):
"""入栈"""
self.top = ListNode(val, self.top)

def pop(self):
"""出栈"""
if self.top:
val = self.top.val
self.top = self.top.next
return val
return None

def peek(self):
"""查看栈顶"""
return self.top.val if self.top else None

def is_empty(self):
return self.top is None

# 使用示例
ls = LinkedStack()
ls.push(10)
ls.push(20)
print(ls.pop()) # 输出: 20

#队列(Queue)实现
#使用列表实现(简单但低效)
class ListQueue:
"""列表实现的队列(先进先出)"""
def __init__(self):
self.items = []

def enqueue(self, item):
"""入队"""
self.items.insert(0, item)

def dequeue(self):
"""出队"""
if not self.is_empty():
return self.items.pop()
return None

def is_empty(self):
return len(self.items) == 0

def size(self):
return len(self.items)

# 使用示例
q = ListQueue()
q.enqueue("A")
q.enqueue("B")
print(q.dequeue()) # 输出: "A"

#高效队列(使用collections.deque)
from collections import deque

class EfficientQueue:
"""高效双端队列实现"""
def __init__(self):
self.queue = deque()

def enqueue(self, item):
self.queue.appendleft(item)

def dequeue(self):
return self.queue.pop() if self.queue else None

def size(self):
return len(self.queue)

# 使用示例
eq = EfficientQueue()
eq.enqueue(1)
eq.enqueue(2)
print(eq.dequeue()) # 输出: 1

#链表实现队列
class LinkedQueue:
"""链表实现的队列"""
def __init__(self):
self.front = self.rear = None

def enqueue(self, val):
"""入队"""
if not self.rear:
self.front = self.rear = ListNode(val)
else:
self.rear.next = ListNode(val)
self.rear = self.rear.next

def dequeue(self):
"""出队"""
if not self.front:
return None
val = self.front.val
self.front = self.front.next
if not self.front:
self.rear = None
return val

# 使用示例
lq = LinkedQueue()
lq.enqueue("X")
lq.enqueue("Y")
print(lq.dequeue()) # 输出: "X"

#双端队列(Deque)实现
class Deque:
"""双端队列实现"""
def __init__(self):
self.items = []

def add_front(self, item):
self.items.append(item)

def add_rear(self, item):
self.items.insert(0, item)

def remove_front(self):
return self.items.pop() if self.items else None

def remove_rear(self):
return self.items.pop(0) if self.items else None

def size(self):
return len(self.items)

# 使用示例
d = Deque()
d.add_rear(1)
d.add_front(2)
print(d.remove_rear()) # 输出: 1

#哈希表(Hash Table)实现
class HashTable:
"""简单哈希表实现"""
def __init__(self, size=10):
self.size = size
self.table = [[] for _ in range(size)]

def _hash(self, key):
return hash(key) % self.size

def set(self, key, value):
"""添加键值对"""
hash_key = self._hash(key)
for i, (k, v) in enumerate(self.table[hash_key]):
if k == key:
self.table[hash_key][i] = (key, value)
return
self.table[hash_key].append((key, value))

def get(self, key):
"""获取值"""
hash_key = self._hash(key)
for k, v in self.table[hash_key]:
if k == key:
return v
return None

def delete(self, key):
"""删除键值对"""
hash_key = self._hash(key)
for i, (k, v) in enumerate(self.table[hash_key]):
if k == key:
del self.table[hash_key][i]
return

# 使用示例
ht = HashTable()
ht.set("name", "Alice")
ht.set("age", 25)
print(ht.get("name")) # 输出: "Alice"
ht.delete("age")

## 实现常见的排序算法(快速排序、归并排序等)
#快速排序(Quick Sort)
def quick_sort(arr):
"""快速排序(分治法)"""
if len(arr) <= 1:
return arr
pivot = arr[len(arr) // 2] # 选择中间元素作为基准
left = [x for x in arr if x < pivot]
middle = [x for x in arr if x == pivot]
right = [x for x in arr if x > pivot]
return quick_sort(left) + middle + quick_sort(right)

# 原地排序版本(更高效)
def quick_sort_inplace(arr, low=0, high=None):
"""原地快速排序"""
if high is None:
high = len(arr) - 1
if low < high:
# 分区操作
pivot_index = partition(arr, low, high)
# 递归排序子数组
quick_sort_inplace(arr, low, pivot_index - 1)
quick_sort_inplace(arr, pivot_index + 1, high)

def partition(arr, low, high):
"""快速排序的分区函数"""
pivot = arr[high] # 选择最后一个元素作为基准
i = low - 1 # 小于基准的元素的边界
for j in range(low, high):
if arr[j] <= pivot:
i += 1
arr[i], arr[j] = arr[j], arr[i]
arr[i + 1], arr[high] = arr[high], arr[i + 1]
return i + 1

# 使用示例
nums = [3, 6, 8, 10, 1, 2, 1]
print("快速排序:", quick_sort(nums)) # 输出: [1, 1, 2, 3, 6, 8, 10]
nums = [3, 6, 8, 10, 1, 2, 1]
quick_sort_inplace(nums)
print("原地快排:", nums) # 输出: [1, 1, 2, 3, 6, 8, 10]

#归并排序(Merge Sort)
def merge_sort(arr):
"""归并排序(分治法)"""
if len(arr) <= 1:
return arr

mid = len(arr) // 2
left = merge_sort(arr[:mid])
right = merge_sort(arr[mid:])
return merge(left, right)

def merge(left, right):
"""合并两个有序数组"""
result = []
i = j = 0
while i < len(left) and j < len(right):
if left[i] < right[j]:
result.append(left[i])
i += 1
else:
result.append(right[j])
j += 1
result.extend(left[i:])
result.extend(right[j:])
return result

# 使用示例
nums = [12, 11, 13, 5, 6, 7]
print("归并排序:", merge_sort(nums)) # 输出: [5, 6, 7, 11, 12, 13]

#堆排序(Heap Sort)
def heap_sort(arr):
"""堆排序"""
def heapify(arr, n, i):
largest = i
l = 2 * i + 1
r = 2 * i + 2

if l < n and arr[l] > arr[largest]:
largest = l
if r < n and arr[r] > arr[largest]:
largest = r
if largest != i:
arr[i], arr[largest] = arr[largest], arr[i]
heapify(arr, n, largest)

n = len(arr)

# 构建最大堆
for i in range(n // 2 - 1, -1, -1):
heapify(arr, n, i)

# 逐个提取元素
for i in range(n - 1, 0, -1):
arr[i], arr[0] = arr[0], arr[i] # 交换
heapify(arr, i, 0)

# 使用示例
nums = [12, 11, 13, 5, 6, 7]
heap_sort(nums)
print("堆排序:", nums) # 输出: [5, 6, 7, 11, 12, 13]

#冒泡排序(Bubble Sort)
def bubble_sort(arr):
"""冒泡排序"""
n = len(arr)
for i in range(n):
# 提前退出标志
swapped = False
for j in range(0, n - i - 1):
if arr[j] > arr[j + 1]:
arr[j], arr[j + 1] = arr[j + 1], arr[j]
swapped = True
if not swapped: # 如果没有发生交换,提前退出
break

# 使用示例
nums = [64, 34, 25, 12, 22, 11, 90]
bubble_sort(nums)
print("冒泡排序:", nums) # 输出: [11, 12, 22, 25, 34, 64, 90]

#选择排序(Selection Sort)
def selection_sort(arr):
"""选择排序"""
for i in range(len(arr)):
min_idx = i
for j in range(i + 1, len(arr)):
if arr[j] < arr[min_idx]:
min_idx = j
arr[i], arr[min_idx] = arr[min_idx], arr[i]

# 使用示例
nums = [29, 10, 14, 37, 13]
selection_sort(nums)
print("选择排序:", nums) # 输出: [10, 13, 14, 29, 37]

#插入排序(Insertion Sort)
def insertion_sort(arr):
"""插入排序"""
for i in range(1, len(arr)):
key = arr[i]
j = i - 1
while j >= 0 and key < arr[j]:
arr[j + 1] = arr[j]
j -= 1
arr[j + 1] = key

# 使用示例
nums = [12, 11, 13, 5, 6]
insertion_sort(nums)
print("插入排序:", nums) # 输出: [5, 6, 11, 12, 13]

#希尔排序(Shell Sort)
def shell_sort(arr):
"""希尔排序(改进的插入排序)"""
n = len(arr)
gap = n // 2

while gap > 0:
for i in range(gap, n):
temp = arr[i]
j = i
while j >= gap and arr[j - gap] > temp:
arr[j] = arr[j - gap]
j -= gap
arr[j] = temp
gap //= 2

# 使用示例
nums = [12, 34, 54, 2, 3]
shell_sort(nums)
print("希尔排序:", nums) # 输出: [2, 3, 12, 34, 54]

#计数排序(Counting Sort)
def counting_sort(arr):
"""计数排序(非比较排序,适用于小范围整数)"""
max_val = max(arr)
count = [0] * (max_val + 1)

for num in arr:
count[num] += 1

sorted_arr = []
for i in range(len(count)):
sorted_arr.extend([i] * count[i])

return sorted_arr

# 使用示例
nums = [4, 2, 2, 8, 3, 3, 1]
print("计数排序:", counting_sort(nums)) # 输出: [1, 2, 2, 3, 3, 4, 8]

#基数排序(Radix Sort)
def radix_sort(arr):
"""基数排序(非比较排序)"""
max_num = max(arr)
exp = 1

while max_num // exp > 0:
counting_sort_by_digit(arr, exp)
exp *= 10

def counting_sort_by_digit(arr, exp):
"""按指定位进行计数排序"""
n = len(arr)
output = [0] * n
count = [0] * 10

for i in range(n):
index = arr[i] // exp
count[index % 10] += 1

for i in range(1, 10):
count[i] += count[i - 1]

i = n - 1
while i >= 0:
index = arr[i] // exp
output[count[index % 10] - 1] = arr[i]
count[index % 10] -= 1
i -= 1

for i in range(n):
arr[i] = output[i]

# 使用示例
nums = [170, 45, 75, 90, 802, 24, 2, 66]
radix_sort(nums)
print("基数排序:", nums) # 输出: [2, 24, 45, 66, 75, 90, 170, 802]

#桶排序(Bucket Sort)
def bucket_sort(arr, bucket_size=0.1):
def insertion_sort(bucket):
for i in range(1, len(bucket)):
key = bucket[i]
j = i - 1
while j >= 0 and bucket[j] > key:
bucket[j + 1] = bucket[j]
j -= 1
bucket[j + 1] = key

"""桶排序"""
if len(arr) == 0:
return arr

# 确定范围
min_val = min(arr)
max_val = max(arr)

# 初始化桶
bucket_count = int((max_val - min_val) / bucket_size) + 1
buckets = [[] for _ in range(bucket_count)]

# 分配元素到桶中
for num in arr:
buckets[int((num - min_val) / bucket_size)].append(num)

# 对每个桶排序并合并
sorted_arr = []
for bucket in buckets:
insertion_sort(bucket)
sorted_arr.extend(sorted(bucket))

return sorted_arr

# 使用示例
nums = [0.42, 0.32, 0.33, 0.52, 0.37, 0.47, 0.51]
print("桶排序:", bucket_sort(nums)) # 输出: [0.32, 0.33, 0.37, 0.42, 0.47, 0.51, 0.52]


## 多线程或多进程实现一个简单的任务并行处理程序
#法1
import threading
import time
from queue import Queue


def worker(task_queue, result_queue):
"""工作线程函数"""
while True:
task = task_queue.get()
if task is None: # 终止信号
break
# 模拟I/O密集型任务(如网络请求)
time.sleep(0.5)
result = f"Processed: {task}"
result_queue.put(result)
task_queue.task_done()


def parallel_thread_processing(tasks, num_workers=4):
"""多线程并行处理"""
task_queue = Queue()
result_queue = Queue()

# 创建并启动工作线程
threads = []
for _ in range(num_workers):
t = threading.Thread(target=worker, args=(task_queue, result_queue))
t.start()
threads.append(t)

# 添加任务到队列
for task in tasks:
task_queue.put(task)

# 等待所有任务完成
task_queue.join()

# 停止工作线程
for _ in range(num_workers):
task_queue.put(None)
for t in threads:
t.join()

# 收集结果
results = []
while not result_queue.empty():
results.append(result_queue.get())
return results


# 使用示例
tasks = ["task1", "task2", "task3", "task4", "task5", "task6"]
print("多线程结果:", parallel_thread_processing(tasks))

#法2
from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED, as_completed

@staticmethod
def exec_many_workflow(func, args=None, workers=10, choice=1, timeout=30, **kwargs):
"""多线程执行业务状态

Args:
func: 被调函数的函数名
args: 被调函数的参数 (1,2,3,4) 代表 func(1) func(2) func(3) 具体入参情况看具体函数处理
workers: 线程数 5 代表有5个线程同时运行某个函数
choice: 1 代表所有线程执行完之后返回线程详情 2 代表只要其中一个线程有返回值 获取即可 其他线程默认继续执行 不影响值获取
**kwargs: 后期场景参数
Returns:
[future,...] 或者 data数据, 外部接口可以根据需要的返回值 [i.result() for i in future]进行获取多线程的结果
"""
num = kwargs.get('num', None)
with ThreadPoolExecutor(max_workers=workers) as executor:
if isinstance(func, list) or isinstance(func, tuple):
if args:
all_task = [
executor.submit(f, **arg)
if isinstance(arg, dict)
else executor.submit(f, *arg)
if isinstance(arg, list) or isinstance(arg, tuple)
else executor.submit(f, arg)
for f, arg in zip(func, args)
]
else:
all_task = [executor.submit(f) for f in func]
else:
if args:
all_task = [
executor.submit(func, **arg)
if isinstance(arg, dict)
else executor.submit(func, *arg)
if isinstance(arg, list) or isinstance(arg, tuple)
else executor.submit(func, arg)
for arg in args
]
else:
if num:
all_task = [executor.submit(func) for i in range(num)]
else:
all_task = [executor.submit(func)]
if choice == 1:
wait(all_task, timeout=timeout, return_when=ALL_COMPLETED)
if choice == 2:
for future in as_completed(all_task):
data = future.result()
# log.info(f"get data is :{data}")
if data:
# log.info(f"get value success :{data}")
return data
return all_task

print(exec_many_workflow(workers=2, func=[is_prime, is_prime], args=([27], [29]), timeout=60))