# -*-encoding:utf-8-*-
import os
import re
import random
def find_line_exist_num(file_dir, save_path, pattern):
    count = 0
    with open(save_path, "w+") as result_f:
        for root, dirs, files in os.walk(file_dir):
            for file in files:
                file_path = os.path.join(root, file)
                with open(file_path) as f:
                    while True:
                        line = f.readline().decode("utf-16")
                        is_true = pattern.search(line)
                        if is_true:
                            result_f.write(line+"\r\n")
                            count += 1
                        if not line:
                            break
    return count
def get_random_line(file_path, num, result_file_path):
    random_nums = [val for val in range(0, num)]
    line_list = random.sample(random_nums, 2000)
    with open(result_file_path, "w+") as result:
        with open(file_path, "rb") as f:
            cur_num = 0
            while True:
                line = f.readline().decode("utf-16")
                if cur_num in line_list:
                    print(r"行号:%s, 内容:%s" % (str(cur_num), line))
                    result.write(line)
if __name__=="__main__":
    # 文件夹路径
    file_dir = ""
    # 所有带数字的行结果存储地址
    save_path = ""
    # 随机抽取2000条数据存储地址
    result_file_path = ""
    pattern = re.compile(r'\d+')  # 查找数字
    count = find_line_exist_num(file_dir, save_path, pattern)
    get_random_line(save_path, count, result_file_path)