Python操作Word:python-docx自动生成报告,再也不用手动排版
Word文档是办公中常用的文档格式。python-docx库可以创建、读取和修改Word文档,实现报告生成、模板填充等自动化任务。
1. python-docx简介
pip install python-docx
from docx import Document
from docx.shared import Inches, Pt, Cm, Mm, Emu
from docx.shared import RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.enum.style import WD_STYLE_TYPE
# 单位说明
# Inches:英寸
# Pt:磅(字体大小常用)
# Cm:厘米
# Mm:毫米
# Emu:英制公制单位(最小单位)
2. 创建Word文档
from docx import Document
from docx.shared import Pt
# 创建新文档
doc = Document()
# 添加标题
doc.add_heading('文档标题', level=0) # level=0是Title样式
doc.add_heading('一级标题', level=1)
doc.add_heading('二级标题', level=2)
# 添加段落
doc.add_paragraph('这是一个普通段落。')
# 添加带样式的段落
doc.add_paragraph('这是引用样式', style='Quote')
doc.add_paragraph('列表项1', style='List Bullet')
doc.add_paragraph('列表项2', style='List Bullet')
doc.add_paragraph('编号项1', style='List Number')
doc.add_paragraph('编号项2', style='List Number')
# 添加分页符
doc.add_page_break()
# 保存文档
doc.save('output.docx')
3. 读取Word文档
from docx import Document
# 打开文档
doc = Document('input.docx')
# 读取所有段落
for para in doc.paragraphs:
print(para.text)
print(f"样式:{para.style.name}")
# 读取所有表格
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
print(cell.text, end=' | ')
print()
# 获取文档属性
core_props = doc.core_properties
print(f"标题:{core_props.title}")
print(f"作者:{core_props.author}")
print(f"创建时间:{core_props.created}")
# 统计信息
print(f"段落数:{len(doc.paragraphs)}")
print(f"表格数:{len(doc.tables)}")
# 搜索文本
def find_text(doc, search_text):
"""在文档中搜索文本"""
results = []
for i, para in enumerate(doc.paragraphs):
if search_text in para.text:
results.append((i, para.text))
return results
matches = find_text(doc, '关键词')
for idx, text in matches:
print(f"段落{idx}: {text}")
4. 段落操作
from docx import Document
from docx.shared import Pt, Cm
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING
doc = Document()
# 添加段落
para = doc.add_paragraph('这是一个段落。')
# 追加文本到段落
para.add_run('追加的文本。')
para.add_run('粗体文本').bold = True
para.add_run('斜体文本').italic = True
# 段落对齐
para.alignment = WD_ALIGN_PARAGRAPH.CENTER # 居中
# LEFT, CENTER, RIGHT, JUSTIFY
# 段落格式
para_format = para.paragraph_format
# 缩进
para_format.left_indent = Cm(1) # 左缩进
para_format.right_indent = Cm(1) # 右缩进
para_format.first_line_indent = Cm(2) # 首行缩进
# 间距
para_format.space_before = Pt(12) # 段前间距
para_format.space_after = Pt(12) # 段后间距
# 行距
para_format.line_spacing = Pt(20) # 固定行距
para_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE # 1.5倍行距
# SINGLE, ONE_POINT_FIVE, DOUBLE, AT_LEAST, EXACTLY, MULTIPLE
# 段落保持
para_format.keep_together = True # 段中不分页
para_format.keep_with_next = True # 与下段同页
para_format.page_break_before = True # 段前分页
# 插入段落到指定位置
def insert_paragraph_after(paragraph, text):
"""在指定段落后插入新段落"""
new_p = paragraph._p.addnext(paragraph._p.makeelement(
'{http://schemas.openxmlformats.org/wordprocessingml/2006/main}p', {}
))
new_para = paragraph._element.getparent().makeelement(
'{http://schemas.openxmlformats.org/wordprocessingml/2006/main}p', {}
)
paragraph._p.addnext(new_para)
from docx.text.paragraph import Paragraph
return Paragraph(new_para, paragraph._parent)
doc.save('paragraphs.docx')
5. 文本格式
from docx import Document
from docx.shared import Pt, RGBColor
from docx.enum.text import WD_UNDERLINE
doc = Document()
para = doc.add_paragraph()
# 添加格式化文本
run = para.add_run('格式化文本示例')
# 字体设置
run.font.name = '微软雅黑'
run.font.size = Pt(14)
run.font.bold = True
run.font.italic = True
run.font.underline = True
run.font.underline = WD_UNDERLINE.DOUBLE # 双下划线
run.font.strike = True # 删除线
run.font.shadow = True # 阴影
run.font.outline = True # 空心
# 字体颜色
run.font.color.rgb = RGBColor(255, 0, 0) # 红色
# 上标和下标
para2 = doc.add_paragraph()
para2.add_run('H')
para2.add_run('2').font.subscript = True # 下标
para2.add_run('O')
para3 = doc.add_paragraph()
para3.add_run('E=mc')
para3.add_run('2').font.superscript = True # 上标
# 高亮
from docx.enum.text import WD_COLOR_INDEX
run2 = para.add_run('高亮文本')
run2.font.highlight_color = WD_COLOR_INDEX.YELLOW
# 中文字体设置(需要设置东亚字体)
from docx.oxml.ns import qn
def set_chinese_font(run, font_name):
"""设置中文字体"""
run.font.name = font_name
run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
run3 = para.add_run('中文字体')
set_chinese_font(run3, '宋体')
doc.save('text_format.docx')
6. 表格操作
from docx import Document
from docx.shared import Inches, Cm, Pt
from docx.enum.table import WD_TABLE_ALIGNMENT, WD_CELL_VERTICAL_ALIGNMENT
from docx.oxml.ns import qn
from docx.oxml import OxmlElement
doc = Document()
# 创建表格
table = doc.add_table(rows=3, cols=4)
table.style = 'Table Grid' # 带边框样式
# 填充数据
data = [
['姓名', '年龄', '部门', '薪资'],
['张三', '25', '技术部', '10000'],
['李四', '30', '销售部', '12000'],
]
for i, row_data in enumerate(data):
row = table.rows[i]
for j, cell_data in enumerate(row_data):
row.cells[j].text = cell_data
# 添加行
new_row = table.add_row()
new_row.cells[0].text = '王五'
new_row.cells[1].text = '28'
new_row.cells[2].text = '财务部'
new_row.cells[3].text = '11000'
# 设置列宽
for cell in table.columns[0].cells:
cell.width = Cm(3)
# 设置行高
for row in table.rows:
row.height = Cm(1)
# 合并单元格
table.cell(0, 0).merge(table.cell(0, 1))
# 单元格对齐
for row in table.rows:
for cell in row.cells:
cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER
cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
# 设置表格对齐
table.alignment = WD_TABLE_ALIGNMENT.CENTER
# 设置表头样式
def set_table_header_style(table):
"""设置表头样式"""
header_row = table.rows[0]
for cell in header_row.cells:
cell.paragraphs[0].runs[0].font.bold = True
# 设置背景色
shading = OxmlElement('w:shd')
shading.set(qn('w:fill'), '4472C4')
cell._tc.get_or_add_tcPr().append(shading)
# 设置字体颜色
for run in cell.paragraphs[0].runs:
run.font.color.rgb = RGBColor(255, 255, 255)
set_table_header_style(table)
# 自动调整表格
table.autofit = True
doc.save('tables.docx')
7. 图片操作
from docx import Document
from docx.shared import Inches, Cm
doc = Document()
# 添加图片
doc.add_picture('image.png')
# 指定大小
doc.add_picture('image.png', width=Inches(4))
doc.add_picture('image.png', height=Cm(5))
doc.add_picture('image.png', width=Inches(4), height=Inches(3))
# 图片居中
para = doc.add_paragraph()
run = para.add_run()
run.add_picture('image.png', width=Inches(4))
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
# 从字节流添加图片
from io import BytesIO
import requests
# 从URL下载图片
# response = requests.get('https://example.com/image.png')
# image_stream = BytesIO(response.content)
# doc.add_picture(image_stream, width=Inches(4))
# 在表格中添加图片
table = doc.add_table(rows=1, cols=2)
cell = table.cell(0, 0)
para = cell.paragraphs[0]
run = para.add_run()
run.add_picture('image.png', width=Inches(2))
doc.save('images.docx')
8. 页眉页脚
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
doc = Document()
# 获取默认节
section = doc.sections[0]
# 页眉
header = section.header
header_para = header.paragraphs[0]
header_para.text = '公司名称 - 机密文档'
header_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
# 页脚
footer = section.footer
footer_para = footer.paragraphs[0]
footer_para.text = '第 X 页'
footer_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
# 添加页码
from docx.oxml.ns import qn
from docx.oxml import OxmlElement
def add_page_number(paragraph):
"""添加页码"""
run = paragraph.add_run()
fldChar1 = OxmlElement('w:fldChar')
fldChar1.set(qn('w:fldCharType'), 'begin')
instrText = OxmlElement('w:instrText')
instrText.text = 'PAGE'
fldChar2 = OxmlElement('w:fldChar')
fldChar2.set(qn('w:fldCharType'), 'end')
run._r.append(fldChar1)
run._r.append(instrText)
run._r.append(fldChar2)
footer_para = footer.paragraphs[0]
footer_para.clear()
footer_para.add_run('第 ')
add_page_number(footer_para)
footer_para.add_run(' 页')
footer_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
# 设置页边距
section.top_margin = Cm(2.54)
section.bottom_margin = Cm(2.54)
section.left_margin = Cm(3.17)
section.right_margin = Cm(3.17)
# 设置页面大小
section.page_width = Cm(21) # A4宽度
section.page_height = Cm(29.7) # A4高度
# 设置页面方向
from docx.enum.section import WD_ORIENT
section.orientation = WD_ORIENT.PORTRAIT # 纵向
# section.orientation = WD_ORIENT.LANDSCAPE # 横向
doc.save('header_footer.docx')
9. 样式操作
from docx import Document
from docx.shared import Pt, RGBColor
from docx.enum.style import WD_STYLE_TYPE
doc = Document()
# 查看内置样式
for style in doc.styles:
print(f"{style.name}: {style.type}")
# 使用内置样式
doc.add_paragraph('正文样式', style='Normal')
doc.add_paragraph('标题1样式', style='Heading 1')
doc.add_paragraph('引用样式', style='Quote')
# 修改现有样式
style = doc.styles['Normal']
style.font.name = '微软雅黑'
style.font.size = Pt(12)
# 创建新样式
from docx.enum.style import WD_STYLE_TYPE
new_style = doc.styles.add_style('MyStyle', WD_STYLE_TYPE.PARAGRAPH)
new_style.font.name = '黑体'
new_style.font.size = Pt(14)
new_style.font.bold = True
new_style.font.color.rgb = RGBColor(0, 0, 128)
# 使用新样式
doc.add_paragraph('使用自定义样式', style='MyStyle')
# 基于现有样式创建
new_style2 = doc.styles.add_style('MyHeading', WD_STYLE_TYPE.PARAGRAPH)
new_style2.base_style = doc.styles['Heading 1']
new_style2.font.color.rgb = RGBColor(255, 0, 0)
doc.save('styles.docx')
10. 实战案例
案例:生成工作报告
"""
实战案例:自动生成工作报告
"""
from docx import Document
from docx.shared import Inches, Pt, Cm, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.oxml.ns import qn
from docx.oxml import OxmlElement
from datetime import datetime
def create_work_report(data, output_file):
"""
创建工作报告
Args:
data: 报告数据字典
output_file: 输出文件路径
"""
doc = Document()
# 设置默认字体
style = doc.styles['Normal']
style.font.name = '微软雅黑'
style.font.size = Pt(11)
style._element.rPr.rFonts.set(qn('w:eastAsia'), '微软雅黑')
# 标题
title = doc.add_heading(data['title'], level=0)
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
# 报告信息
info_para = doc.add_paragraph()
info_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
info_para.add_run(f"报告人:{data['author']} ")
info_para.add_run(f"日期:{data['date']}")
doc.add_paragraph() # 空行
# 工作概述
doc.add_heading('一、工作概述', level=1)
doc.add_paragraph(data['summary'])
# 完成的工作
doc.add_heading('二、完成的工作', level=1)
for i, task in enumerate(data['completed_tasks'], 1):
para = doc.add_paragraph(style='List Number')
para.add_run(f"{task['name']}").bold = True
para.add_run(f"\n 完成时间:{task['date']}")
para.add_run(f"\n 工作内容:{task['description']}")
# 工作数据表格
doc.add_heading('三、工作数据', level=1)
table = doc.add_table(rows=1, cols=4)
table.style = 'Table Grid'
table.alignment = WD_TABLE_ALIGNMENT.CENTER
# 表头
headers = ['项目', '计划', '完成', '完成率']
header_cells = table.rows[0].cells
for i, header in enumerate(headers):
header_cells[i].text = header
header_cells[i].paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
for run in header_cells[i].paragraphs[0].runs:
run.font.bold = True
# 数据行
for item in data['statistics']:
row = table.add_row()
row.cells[0].text = item['name']
row.cells[1].text = str(item['planned'])
row.cells[2].text = str(item['completed'])
row.cells[3].text = f"{item['completed']/item['planned']*100:.1f}%"
for cell in row.cells:
cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
doc.add_paragraph()
# 下周计划
doc.add_heading('四、下周计划', level=1)
for plan in data['next_week_plans']:
doc.add_paragraph(plan, style='List Bullet')
# 问题与建议
doc.add_heading('五、问题与建议', level=1)
if data['issues']:
for issue in data['issues']:
doc.add_paragraph(issue, style='List Bullet')
else:
doc.add_paragraph('无')
# 页脚
section = doc.sections[0]
footer = section.footer
footer_para = footer.paragraphs[0]
footer_para.text = f"生成时间:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
footer_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
# 保存
doc.save(output_file)
print(f"报告已生成:{output_file}")
# 使用示例
if __name__ == '__main__':
report_data = {
'title': '周工作报告',
'author': '张三',
'date': '2024年12月18日',
'summary': '本周主要完成了项目A的开发工作,并协助测试团队进行了系统测试。',
'completed_tasks': [
{'name': '项目A模块开发', 'date': '12月16日', 'description': '完成核心功能模块的开发'},
{'name': '代码审查', 'date': '12月17日', 'description': '审查团队成员提交的代码'},
{'name': '系统测试', 'date': '12月18日', 'description': '配合测试团队完成系统测试'},
],
'statistics': [
{'name': '代码行数', 'planned': 1000, 'completed': 1200},
{'name': 'Bug修复', 'planned': 10, 'completed': 12},
{'name': '文档编写', 'planned': 5, 'completed': 4},
],
'next_week_plans': [
'完成项目A的剩余功能开发',
'编写技术文档',
'参与项目B的需求评审',
],
'issues': [
'服务器资源不足,影响开发效率',
'部分需求不够明确,需要进一步沟通',
]
}
create_work_report(report_data, '周工作报告.docx')
11. 总结
🔑 核心要点
| 知识点 | 要点 |
|---|---|
| 文档操作 | Document(), save(), add_heading(), add_paragraph() |
| 段落格式 | 对齐、缩进、间距、行距 |
| 文本格式 | 字体、大小、颜色、粗体、斜体 |
| 表格 | add_table(), 合并单元格、样式设置 |
| 图片 | add_picture(), 指定大小 |
| 页眉页脚 | section.header, section.footer |
✅ 学习检查清单
- 能创建和读取Word文档
- 能设置段落和文本格式
- 能创建和格式化表格
- 能插入图片
- 能设置页眉页脚
📖 下一步学习
掌握了Word操作后,让我们学习PPT自动化:
常见问题 FAQ
💬 python-docx能读取PDF吗?
不能。python-docx只处理.docx格式。读PDF用PyPDF2或pdfplumber。读.doc(旧格式)需要先用LibreOffice命令行转换为.docx。
💬 怎么用模板生成Word文档?
两种方式:1)用python-docx读取模板文件,替换占位符文本;2)用docxtpl库,支持Jinja2模板语法,更适合复杂模板。
� 系列导航
- 上一篇:16 - Python操作Excel
- 当前:17 - Python操作Word
- 下一篇:18 - Python操作PPT