起名字小项目
1.字全排列名:
输入为single.txt,格式如下:
风
玺
龙
雨
灵
若
思
博
修
程序为:
# -*- coding: utf-8 -*- result=[] def permutations(arr, position, end): if position == end: # print('刘'+''.join(arr)) strr=''.join(arr) result.append(strr) else: for index in range(position, end): arr[index], arr[position] = arr[position], arr[index] permutations(arr, position + 1, end) arr[index], arr[position] = arr[position], arr[index] if __name__ == '__main__': with open('single.txt','r') as f: first=f.readlines() total=[] for i in first: #去掉换行符 i=i.replace('\n','') total.append(i) # print(total) id_=[] res=[] for i1,v1 in enumerate(total): for i2, v2 in enumerate(total): for i3, v3 in enumerate(total): if i1!=i2 and i2!=i3 and i1!=i3: # if [i1,i2,i3] not in id_: res.append([v1,v2,v3]) id_.append([i1,i2,i3]) for r in res: permutations(r,0,2) with open('name.txt','w') as f: for r in result: f.write(r) f.write('\n')
2. 利用爬虫技术用网站判断名字好坏:
代码:
# encoding: utf-8 from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait # 显示等待 # 设置等待执行语句 from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By import lxml.html etree = lxml.html.etree import threading import time import os import shutil # 定义全局变量 num_thread = 5 # 线程个数 # count=0 #用来对allname计数 driver_path = r"./chromedriver.exe" xing = '刘' threshold=95.0 part_dir='part/' # 锁机制(用来控制弹出锁和写入锁) gLock1 = threading.Lock() #弹出锁 gLock2 = threading.Lock() #写入锁 #删除并创建目录 def rm_mk_dir(path): try: shutil.rmtree(path) print(path+':删除成功') os.makedirs(path) print(path + ':创建成功') except: print(path+":尚未创建") os.makedirs(path) print(path + ':创建成功') count=0 def get_score(partname): global count #声明全局变量以使用!! while partname:#简单的控制次数;搬砖越搬越少的原理! #1.第一层带锁处,读取数据带锁,用来弹出数据 gLock1.acquire() name = partname.pop() #弹出一个名字 ming = name.strip() gLock1.release() # #ip代理 # ip = '218.22.7.62:53281' # chrome_options = webdriver.ChromeOptions() # chrome_options.add_argument(('--proxy-server=http://' + ip)) # 驱动 # 使用代理的驱动 # driver = webdriver.Chrome(executable_path=driver_path,chrome_options=chrome_options) # 普通驱动 driver = webdriver.Chrome(executable_path=driver_path) driver.get('https://www.sheup.com/xingming_dafen.php') # 键入信息 input_tag1 = driver.find_element_by_name("xingx") input_tag2 = driver.find_element_by_name("mingx") submit_tag = driver.find_element_by_class_name("set_submit") # 输入,获取信息 input_tag1.send_keys(xing) input_tag2.send_keys(ming) submit_tag.click() time.sleep(2) # #加入显示等待 # try: # WebDriverWait(driver, 20).until( # EC.presence_of_element_located((By.XPATH, "//div[@class='dr_ass']/span/text()")) # 必须是元组 # ) # finally: # driver.quit() source = driver.page_source driver.quit() # 分析页面 html = etree.HTML(source) # result=etree.tostring(html,encoding='utf-8').decode('utf-8') # print(result) # td=html.xpath("//div[@class='content']//td[@style='padding-left:20px;']")[0] # print(etree.tostring(td,encoding='utf-8').decode('utf-8')) # print(etree.tostring(score,encoding='utf-8').decode('utf-8')) score = html.xpath("//div[@class='dr_ass']/span/text()")[0] score = float(str(score)) count+=1 print(count%100,',',xing + ming + ' ' + str(score)) # 2.第二处带锁的地方,输出到文本 if score > threshold: gLock2.acquire() # 文件加锁 with open('goodname.txt', 'r+') as out: all_lines = out.readlines() #知识点 #如果还没有被添加到文本中,则进行添加 for line in all_lines: if ming in line.strip(): break else: out.write(xing) out.write(ming) out.write(' ') out.write(str(score)) out.write('\n') gLock2.release() if __name__ == '__main__': #准备数据 if 0: # 1. 读取数据 with open('name.txt', 'r') as f: allname = f.readlines() allname=list(set(allname)) #为了保险,去重 print('名字总数为:',len(allname)) #2.把allname.txt 分割成小的part.txt rm_mk_dir(part_dir) line_num=50 total_num = len(allname) part_num = total_num // line_num # 子txt数目 for i in range(part_num): if i == 0: part_name=part_dir+'part' + str(i)+'.txt' locals()['part' + str(i)] = allname[:line_num] with open(part_name,'a',encoding='utf-8') as part_f: for line in locals()['part' + str(i)]: part_f.write(line) elif i == part_num - 1: # 最后一个 part_name=part_dir+'part' + str(i)+'.txt' locals()['part' + str(i)] = allname[i * line_num:] with open(part_name,'a',encoding='utf-8') as part_f: for line in locals()['part' + str(i)]: part_f.write(line) else: part_name=part_dir+'part' + str(i)+'.txt' locals()['part' + str(i)] = allname[i * line_num:(i + 1) * line_num] with open(part_name,'a',encoding='utf-8') as part_f: for line in locals()['part' + str(i)]: part_f.write(line) #3.判断goodname.txt是否存在,如果不存在则创建 if not os.path.exists('goodname.txt'): print('goodname.txt 尚不存在!') temp_f=open("goodname.txt",'w') temp_f.close() # 4. 分析一个姓名的好坏,输出结果,(多线程版本) for i in enumerate(os.listdir(part_dir)): # if i[0]<37:continue #控制已经做过的part.txt print(i[0],',',i[1],':') with open(part_dir+str(i[1]),'r',encoding='utf-8') as f: partname = f.readlines() partname=list(set(partname)) #为了保险,去重 print('名字总数为:',len(partname)) #起线程 for i in range(num_thread): locals()['t' + str(i)] = threading.Thread(target=get_score, args=[partname]) locals()['t' + str(i)].setDaemon(True) locals()['t' + str(i)].start() #终止线程,非常重要!! for i in range(num_thread): locals()['t' + str(i)].join()
 
                     
                    
                 
                    
                
 
                
            
         
         浙公网安备 33010602011771号
浙公网安备 33010602011771号