file_name = r'C:/Users/HZJT0040/Desktop/Daliymed/410/2fbaadf6-c86c-48bb-bbba-803377841733.xml'
with open(file_name, 'r', encoding='utf-8') as f: # 读取xml文本
html = f.read()
pattern = re.compile('<structuredBody>(.*?)</structuredBody>',re.S) # 正则匹配指定的标签内容
page_data = re.findall(pattern, html)[0]
try:
soup = BeautifulSoup(page_data, 'lxml')
result = soup.component.find_next_siblings() # find_next_siblings () 方法返回所有符合条件的后面的兄弟节点
print(len(result))
for i,ul in enumerate(result[1:]): # 获取外层标签指定的内容text
result = ul.text.replace("\t", "").replace("\xa0", "").strip() #
resultList = result.split('\n')
for del_str in resultList:
if del_str:
print(del_str.replace("\n", ""))
except Exception as e:
print(e)