Loading

红宝书词汇导出到成欧路词典单词库和生词本

记录一下红宝书数据库导出成欧路词典格式的代码

import sqlite3
import time

beginTime = time.time_ns()

dbPath = './2023.db'
outputPath = './2023RedBabyBook.txt'

# Switches for all items
settings = {
    'unit': True,  # 单元
    'support': True,  # 助记
    'derivative': True,  # 派生
    'antonym': True,  # 反义
    'phrase': True,  # 词组
    'discrimination': True,  # 辨义
    'related': True,  # 关联
    'example': True,  # 例句
    'book': True  # 权重
}

# The order of items that appear only once
onceItemOrder = ['support', 'derivative', 'antonym', 'phrase', 'discrimination', 'related', 'unit', 'book']

# new line symbol
nl = '<br>'

# 可选
opt = {
    'unit': [lambda un: f"from Unit {un}" if settings['unit'] else ''],  # 单元
    'support': [lambda su: f"助记:{su}" if settings['support'] and su else ''],  # 助记
    'derivative': [lambda de: f"派生词:{de}" if settings['derivative'] and de else ''],  # 派生
    'antonym': [lambda an: f"{nl}反义词:{an}" if settings['antonym'] and an else ''],  # 反义
    'phrase': [lambda ph: f"词组和短语:{ph}" if settings['phrase'] and ph else ''],  # 词组
    'discrimination': [lambda di: f"词义辨析:{di}" if settings['discrimination'] and di else ''],  # 辨义
    'related': [lambda re: f"关联词:{re}" if settings['related'] and re else ''],  # 关联
    'example': [lambda ex: f"{nl}{ex}" if settings['example'] and ex else ''],  # 例句
    'book': [lambda bo: f"属于{bo}" if settings['book'] else '']  # 权重
}

# 必须
ess = [
    'word',  # 单词
    'pos',  # 词性
    'meaning'  # 释义
]

# 这里本来想设计一种按需读取的数据结构与算法,但是奈何学艺不精。_ 。
# TODO sql = f"SELECT {','.join([','.join(essentialItem),','.join(k for k in optionalItem if optionalItem[k][0])])} FROM words_all_info;"
sql = f"SELECT {','.join(ess)},{','.join(opt)} FROM words_all_info;"

# CURD
conn = sqlite3.connect(dbPath)
curs = conn.cursor()
data = list(curs.execute(sql))

# 在读取数据后opt自我改造一下
tuple(map(lambda i, v: opt[v].append(i), *tuple(zip(*enumerate(opt)))))

# 存放单词最终结果
wordsDic, booksDic = {}, {k: [] for k in range(1, max(i[len(ess) + opt['unit'][-1]] for i in data) + 1)}

for items in data:
    word, pos, meaning = items[:3]
    inf = items[3:]
    res = {k: opt[k][0](inf[opt[k][1]]) for k in opt}
    posMeaning = pos + ' ' + meaning
    if word in wordsDic:
        wordsDic[word]['pri'] += nl + posMeaning
        wordsDic[word]['sec'] += res['example']
    else:
        wordsDic[word] = {
            'pri': posMeaning,
            'sec': f"{nl*4}例句:{res['example']}" if res['example'] else '',
            'once': nl * 2 + (nl * 2).join([res[k] for k in onceItemOrder if res[k]])
        }
        booksDic[int(res['unit'][-2:])].append(word)

# 按照欧路词典格式生成词库文件
with open(outputPath, 'w', encoding='utf-8') as f:
    for item in (word + '@' + ''.join([wordsDic[word][k] for k in wordsDic[word]]) + '\n' for word in wordsDic):
        f.write(item)

# 按照单元来分生词本
with open('unitwords.txt', 'w', encoding='utf-8') as f:
    ln = '\n'
    f.write('\n'.join([f"#Unit{k:02d}{ln}{ln.join(booksDic[k])}" for k in booksDic]))

# 释放资源
curs.close()
conn.close()

endTime = time.time_ns()
print(f"一共写入{len(wordsDic)}条单词,共耗时{(endTime-beginTime)/1e9:.2f}秒")
posted @ 2022-04-04 23:41  Biem  阅读(507)  评论(0编辑  收藏  举报