html = '''<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>菜鸟教程(runoob.com)</title>
</head>
<body>
<h1>我的第一个标题</h1>
<p id="first">我的第一个段落。</p >
</body>
<table border="1">
<tr>
<td>row 1, cell 1</td>
<td>row 1, cell 2</td>
</tr>
<tr>
<td>row 2, cell 1</td>
<td>row 2, cell 2</td>
</tr>
</table>
</html>'''
import re
def getChinese(html):
html_unicode=html.strip()
string=re.compile('[^\u4e00-\u9fff]')
chinese="".join(string.split(html_unicode))
return chinese
from bs4 import BeautifulSoup
soup=BeautifulSoup(html)
print("获取head标签内容:")
print(soup.head)
print("学号后两位:14")
print()
print("获取body标签内容:")
print(soup.body)
print()
print("id为first的标签对象:")
print(soup.p)
print()
print("获取html中的中文字符")
print(getChinese(html))
![]()