02oeis.py(OEIS解析)

# name: 02oeis.py
# date: 20240324
# link: https://blog.csdn.net/qq_38848616/article/details/79637559
# programmer: arithmus
# memo: find the OEIS words

import codecs,sys
import urllib.request
import re
dict1 = {}

#1. 写入斐波那契数列文本:Fi.txt
f=codecs.open('Fi.txt','w')
url1 = 'http://oeis.org/wiki/Index_to_OEIS:_Section_Fi'
print(url1)
f.write(url1)
f.write('\n')

#2. 获取序列号
req1=urllib.request.Request(url1)
content1=urllib.request.urlopen(req1).read().decode('utf-8')
eachs = re.compile('<a href="http://oeis.org/A.*?">A',re.S)#获取000027(没全)
eachs = re.findall(eachs,content1)
for i in range(len(eachs)):#循环获取A000027,A000028等等
    a =eachs[i]
    each = a.partition('oeis.org/A')[2].partition('">')[0]#获取002718
    if each == '':
        continue
    if len(each) == 7:
        each = each[1:]
    if each in dict1:
            dict1[each]=dict1[each]+1
            continue
    else:

#3. 写入序列号内容
        dict1[each]=1
        url2 = r'https://oeis.org/A'+each+'/b'+each+'.txt'#获取网址
        print(url2)
        f.write(url2)
        f.write('\n')
        req2=urllib.request.Request(url2)
        content2=urllib.request.urlopen(req2).readlines()
        count = 0
        for line in content2:
            line = line. decode('utf-8')
            line = line.strip()
            if line == '':
                continue
            line = line.split()
            if len(line) > 2 or len(line) == 1:
                continue
            count +=1
            if count % 101 == 0:
                break
            print(line[1])
            f.write(line[1]+' ')
        f.write('\n')
f.close()

 

posted @ 2024-03-24 10:35  taohid  阅读(19)  评论(0)    收藏  举报