for paragraph in doc.paragraphs:
for run in paragraph.runs:
if run.element.xml.startswith('<w:drawing'):
inline = run.inline_shapes[0]
if inline.has_image:
image = inline._inline.graphic.graphicData.pic.nvPicPr.cNvPr.name
print(image)
获取图片二进制数据
from docx.shared import Inches
for paragraph in doc.paragraphs:
for run in paragraph.runs:
if run.element.xml.startswith('<w:drawing'):
inline = run.inline_shapes[0]
if inline.has_image:
image = inline._inline.graphic.graphicData.pic.nvPicPr.cNvPr.name
image_data = inline._inline.graphic.graphicData.pic.blipFill.blip
with open(f"{image}.png", 'wb') as f:
f.write(image_data)
如果你需要从letter.docx文档中提取所有图片数据,可以使用以下代码实现。
import docx
from docx.shared import Inches
doc = docx.Document('letter.docx')
for paragraph in doc.paragraphs:
for run in paragraph.runs:
if run.element.xml.startswith('<w:drawing'):
inline = run.inline_shapes[0]
if inline.has_image:
image = inline._inline.graphic.graphicData.pic.nvPicPr.cNvPr.name
image_data = inline._inline.graphic.graphicData.pic.blipFill.blip
with open(f"{image}.png", 'wb') as f:
f.write(image_data)
如果你只需要提取某一个特定的Word文档中的图片,可以通过修改文档名称和图片名称信息,使用以下代码解决。
import docx
from docx.shared import Inches
doc = docx.Document('example.docx')
for paragraph in doc.paragraphs:
for run in paragraph.runs:
if run.element.xml.startswith('<w:drawing'):
inline = run.inline_shapes[0]
if inline.has_image:
image = inline._inline.graphic.graphicData.pic.nvPicPr.cNvPr.name
if image == 'image.png':
image_data = inline._inline.graphic.graphicData.pic.blipFill.blip
with open(f"{image}.png", 'wb') as f:
f.write(image_data)