# Extract all text from slides in presentation # 从演示文稿中的幻灯片中提取所有文本 from pptx import Presentation
# 打开 ppt prs = Presentation(path_to_presentation)
# text_runs will be populated with a list of strings, # one for each text run in presentation text_runs = []
# 获取 Slide 幻灯片 for slide in prs.slides: # 获取形状 Shape for shape in slide.shapes: # 判断是否有文字框 text_frame ifnot shape.has_text_frame: continue # 获取文字框中的段落 paragraphs for paragraph in shape.text_frame.paragraphs: # 文字块 run for run in paragraph.runs: # 获取文字并加到字符串数组中 text_runs.append(run.text) # 打印测试结果 print(text_runs)
思路
创建 result 结果列表,最后我们会将全部信息存储到 result 列表中,并写入一个 md 文档当中。
Traceback (most recent call last): File "/Users/用户名/.local/share/virtualenvs/smallScript-RtozSf8y/lib/python3.10/site-packages/pptx/compat/__init__.py", line 10, in <module> Container = collections.abc.Container AttributeError: module 'collections' has no attribute 'abc'
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/Users/用户名/workspace/python/smallScript/office/ppt2markdown.py", line 3, in <module> from pptx import Presentation File "/Users/用户名/.local/share/virtualenvs/smallScript-RtozSf8y/lib/python3.10/site-packages/pptx/__init__.py", line 14, in <module> from pptx.api import Presentation # noqa File "/Users/用户名/.local/share/virtualenvs/smallScript-RtozSf8y/lib/python3.10/site-packages/pptx/api.py", line 15, in <module> from .package import Package File "/Users/用户名/.local/share/virtualenvs/smallScript-RtozSf8y/lib/python3.10/site-packages/pptx/package.py", line 6, in <module> from pptx.opc.package import OpcPackage File "/Users/用户名/.local/share/virtualenvs/smallScript-RtozSf8y/lib/python3.10/site-packages/pptx/opc/package.py", line 11, in <module> from pptx.compat import is_string, Mapping File "/Users/用户名/.local/share/virtualenvs/smallScript-RtozSf8y/lib/python3.10/site-packages/pptx/compat/__init__.py", line 14, in <module> Container = collections.Container AttributeError: module 'collections' has no attribute 'Container'