Veloris.
返回索引
设计实战 2026-02-14

Python操作Word:python-docx自动生成报告,再也不用手动排版

2 分钟
403 words

Python操作Word:python-docx自动生成报告,再也不用手动排版

Word文档是办公中常用的文档格式。python-docx库可以创建、读取和修改Word文档,实现报告生成、模板填充等自动化任务。


1. python-docx简介

pip install python-docx
from docx import Document
from docx.shared import Inches, Pt, Cm, Mm, Emu
from docx.shared import RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.enum.style import WD_STYLE_TYPE

# 单位说明
# Inches:英寸
# Pt:磅(字体大小常用)
# Cm:厘米
# Mm:毫米
# Emu:英制公制单位(最小单位)

2. 创建Word文档

from docx import Document
from docx.shared import Pt

# 创建新文档
doc = Document()

# 添加标题
doc.add_heading('文档标题', level=0)  # level=0是Title样式
doc.add_heading('一级标题', level=1)
doc.add_heading('二级标题', level=2)

# 添加段落
doc.add_paragraph('这是一个普通段落。')

# 添加带样式的段落
doc.add_paragraph('这是引用样式', style='Quote')
doc.add_paragraph('列表项1', style='List Bullet')
doc.add_paragraph('列表项2', style='List Bullet')
doc.add_paragraph('编号项1', style='List Number')
doc.add_paragraph('编号项2', style='List Number')

# 添加分页符
doc.add_page_break()

# 保存文档
doc.save('output.docx')

3. 读取Word文档

from docx import Document

# 打开文档
doc = Document('input.docx')

# 读取所有段落
for para in doc.paragraphs:
    print(para.text)
    print(f"样式:{para.style.name}")

# 读取所有表格
for table in doc.tables:
    for row in table.rows:
        for cell in row.cells:
            print(cell.text, end=' | ')
        print()

# 获取文档属性
core_props = doc.core_properties
print(f"标题:{core_props.title}")
print(f"作者:{core_props.author}")
print(f"创建时间:{core_props.created}")

# 统计信息
print(f"段落数:{len(doc.paragraphs)}")
print(f"表格数:{len(doc.tables)}")

# 搜索文本
def find_text(doc, search_text):
    """在文档中搜索文本"""
    results = []
    for i, para in enumerate(doc.paragraphs):
        if search_text in para.text:
            results.append((i, para.text))
    return results

matches = find_text(doc, '关键词')
for idx, text in matches:
    print(f"段落{idx}: {text}")

4. 段落操作

from docx import Document
from docx.shared import Pt, Cm
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING

doc = Document()

# 添加段落
para = doc.add_paragraph('这是一个段落。')

# 追加文本到段落
para.add_run('追加的文本。')
para.add_run('粗体文本').bold = True
para.add_run('斜体文本').italic = True

# 段落对齐
para.alignment = WD_ALIGN_PARAGRAPH.CENTER  # 居中
# LEFT, CENTER, RIGHT, JUSTIFY

# 段落格式
para_format = para.paragraph_format

# 缩进
para_format.left_indent = Cm(1)      # 左缩进
para_format.right_indent = Cm(1)     # 右缩进
para_format.first_line_indent = Cm(2)  # 首行缩进

# 间距
para_format.space_before = Pt(12)    # 段前间距
para_format.space_after = Pt(12)     # 段后间距

# 行距
para_format.line_spacing = Pt(20)    # 固定行距
para_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE  # 1.5倍行距
# SINGLE, ONE_POINT_FIVE, DOUBLE, AT_LEAST, EXACTLY, MULTIPLE

# 段落保持
para_format.keep_together = True     # 段中不分页
para_format.keep_with_next = True    # 与下段同页
para_format.page_break_before = True # 段前分页

# 插入段落到指定位置
def insert_paragraph_after(paragraph, text):
    """在指定段落后插入新段落"""
    new_p = paragraph._p.addnext(paragraph._p.makeelement(
        '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}p', {}
    ))
    new_para = paragraph._element.getparent().makeelement(
        '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}p', {}
    )
    paragraph._p.addnext(new_para)
    from docx.text.paragraph import Paragraph
    return Paragraph(new_para, paragraph._parent)

doc.save('paragraphs.docx')

5. 文本格式

from docx import Document
from docx.shared import Pt, RGBColor
from docx.enum.text import WD_UNDERLINE

doc = Document()
para = doc.add_paragraph()

# 添加格式化文本
run = para.add_run('格式化文本示例')

# 字体设置
run.font.name = '微软雅黑'
run.font.size = Pt(14)
run.font.bold = True
run.font.italic = True
run.font.underline = True
run.font.underline = WD_UNDERLINE.DOUBLE  # 双下划线
run.font.strike = True       # 删除线
run.font.shadow = True       # 阴影
run.font.outline = True      # 空心

# 字体颜色
run.font.color.rgb = RGBColor(255, 0, 0)  # 红色

# 上标和下标
para2 = doc.add_paragraph()
para2.add_run('H')
para2.add_run('2').font.subscript = True  # 下标
para2.add_run('O')

para3 = doc.add_paragraph()
para3.add_run('E=mc')
para3.add_run('2').font.superscript = True  # 上标

# 高亮
from docx.enum.text import WD_COLOR_INDEX
run2 = para.add_run('高亮文本')
run2.font.highlight_color = WD_COLOR_INDEX.YELLOW

# 中文字体设置(需要设置东亚字体)
from docx.oxml.ns import qn

def set_chinese_font(run, font_name):
    """设置中文字体"""
    run.font.name = font_name
    run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)

run3 = para.add_run('中文字体')
set_chinese_font(run3, '宋体')

doc.save('text_format.docx')

6. 表格操作

from docx import Document
from docx.shared import Inches, Cm, Pt
from docx.enum.table import WD_TABLE_ALIGNMENT, WD_CELL_VERTICAL_ALIGNMENT
from docx.oxml.ns import qn
from docx.oxml import OxmlElement

doc = Document()

# 创建表格
table = doc.add_table(rows=3, cols=4)
table.style = 'Table Grid'  # 带边框样式

# 填充数据
data = [
    ['姓名', '年龄', '部门', '薪资'],
    ['张三', '25', '技术部', '10000'],
    ['李四', '30', '销售部', '12000'],
]

for i, row_data in enumerate(data):
    row = table.rows[i]
    for j, cell_data in enumerate(row_data):
        row.cells[j].text = cell_data

# 添加行
new_row = table.add_row()
new_row.cells[0].text = '王五'
new_row.cells[1].text = '28'
new_row.cells[2].text = '财务部'
new_row.cells[3].text = '11000'

# 设置列宽
for cell in table.columns[0].cells:
    cell.width = Cm(3)

# 设置行高
for row in table.rows:
    row.height = Cm(1)

# 合并单元格
table.cell(0, 0).merge(table.cell(0, 1))

# 单元格对齐
for row in table.rows:
    for cell in row.cells:
        cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER
        cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER

# 设置表格对齐
table.alignment = WD_TABLE_ALIGNMENT.CENTER

# 设置表头样式
def set_table_header_style(table):
    """设置表头样式"""
    header_row = table.rows[0]
    for cell in header_row.cells:
        cell.paragraphs[0].runs[0].font.bold = True
        # 设置背景色
        shading = OxmlElement('w:shd')
        shading.set(qn('w:fill'), '4472C4')
        cell._tc.get_or_add_tcPr().append(shading)
        # 设置字体颜色
        for run in cell.paragraphs[0].runs:
            run.font.color.rgb = RGBColor(255, 255, 255)

set_table_header_style(table)

# 自动调整表格
table.autofit = True

doc.save('tables.docx')

7. 图片操作

from docx import Document
from docx.shared import Inches, Cm

doc = Document()

# 添加图片
doc.add_picture('image.png')

# 指定大小
doc.add_picture('image.png', width=Inches(4))
doc.add_picture('image.png', height=Cm(5))
doc.add_picture('image.png', width=Inches(4), height=Inches(3))

# 图片居中
para = doc.add_paragraph()
run = para.add_run()
run.add_picture('image.png', width=Inches(4))
para.alignment = WD_ALIGN_PARAGRAPH.CENTER

# 从字节流添加图片
from io import BytesIO
import requests

# 从URL下载图片
# response = requests.get('https://example.com/image.png')
# image_stream = BytesIO(response.content)
# doc.add_picture(image_stream, width=Inches(4))

# 在表格中添加图片
table = doc.add_table(rows=1, cols=2)
cell = table.cell(0, 0)
para = cell.paragraphs[0]
run = para.add_run()
run.add_picture('image.png', width=Inches(2))

doc.save('images.docx')

8. 页眉页脚

from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH

doc = Document()

# 获取默认节
section = doc.sections[0]

# 页眉
header = section.header
header_para = header.paragraphs[0]
header_para.text = '公司名称 - 机密文档'
header_para.alignment = WD_ALIGN_PARAGRAPH.CENTER

# 页脚
footer = section.footer
footer_para = footer.paragraphs[0]
footer_para.text = '第 X 页'
footer_para.alignment = WD_ALIGN_PARAGRAPH.CENTER

# 添加页码
from docx.oxml.ns import qn
from docx.oxml import OxmlElement

def add_page_number(paragraph):
    """添加页码"""
    run = paragraph.add_run()
    fldChar1 = OxmlElement('w:fldChar')
    fldChar1.set(qn('w:fldCharType'), 'begin')
    
    instrText = OxmlElement('w:instrText')
    instrText.text = 'PAGE'
    
    fldChar2 = OxmlElement('w:fldChar')
    fldChar2.set(qn('w:fldCharType'), 'end')
    
    run._r.append(fldChar1)
    run._r.append(instrText)
    run._r.append(fldChar2)

footer_para = footer.paragraphs[0]
footer_para.clear()
footer_para.add_run('第 ')
add_page_number(footer_para)
footer_para.add_run(' 页')
footer_para.alignment = WD_ALIGN_PARAGRAPH.CENTER

# 设置页边距
section.top_margin = Cm(2.54)
section.bottom_margin = Cm(2.54)
section.left_margin = Cm(3.17)
section.right_margin = Cm(3.17)

# 设置页面大小
section.page_width = Cm(21)    # A4宽度
section.page_height = Cm(29.7)  # A4高度

# 设置页面方向
from docx.enum.section import WD_ORIENT
section.orientation = WD_ORIENT.PORTRAIT  # 纵向
# section.orientation = WD_ORIENT.LANDSCAPE  # 横向

doc.save('header_footer.docx')

9. 样式操作

from docx import Document
from docx.shared import Pt, RGBColor
from docx.enum.style import WD_STYLE_TYPE

doc = Document()

# 查看内置样式
for style in doc.styles:
    print(f"{style.name}: {style.type}")

# 使用内置样式
doc.add_paragraph('正文样式', style='Normal')
doc.add_paragraph('标题1样式', style='Heading 1')
doc.add_paragraph('引用样式', style='Quote')

# 修改现有样式
style = doc.styles['Normal']
style.font.name = '微软雅黑'
style.font.size = Pt(12)

# 创建新样式
from docx.enum.style import WD_STYLE_TYPE

new_style = doc.styles.add_style('MyStyle', WD_STYLE_TYPE.PARAGRAPH)
new_style.font.name = '黑体'
new_style.font.size = Pt(14)
new_style.font.bold = True
new_style.font.color.rgb = RGBColor(0, 0, 128)

# 使用新样式
doc.add_paragraph('使用自定义样式', style='MyStyle')

# 基于现有样式创建
new_style2 = doc.styles.add_style('MyHeading', WD_STYLE_TYPE.PARAGRAPH)
new_style2.base_style = doc.styles['Heading 1']
new_style2.font.color.rgb = RGBColor(255, 0, 0)

doc.save('styles.docx')

10. 实战案例

案例:生成工作报告

"""
实战案例:自动生成工作报告
"""
from docx import Document
from docx.shared import Inches, Pt, Cm, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.oxml.ns import qn
from docx.oxml import OxmlElement
from datetime import datetime

def create_work_report(data, output_file):
    """
    创建工作报告
    
    Args:
        data: 报告数据字典
        output_file: 输出文件路径
    """
    doc = Document()
    
    # 设置默认字体
    style = doc.styles['Normal']
    style.font.name = '微软雅黑'
    style.font.size = Pt(11)
    style._element.rPr.rFonts.set(qn('w:eastAsia'), '微软雅黑')
    
    # 标题
    title = doc.add_heading(data['title'], level=0)
    title.alignment = WD_ALIGN_PARAGRAPH.CENTER
    
    # 报告信息
    info_para = doc.add_paragraph()
    info_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
    info_para.add_run(f"报告人:{data['author']}    ")
    info_para.add_run(f"日期:{data['date']}")
    
    doc.add_paragraph()  # 空行
    
    # 工作概述
    doc.add_heading('一、工作概述', level=1)
    doc.add_paragraph(data['summary'])
    
    # 完成的工作
    doc.add_heading('二、完成的工作', level=1)
    for i, task in enumerate(data['completed_tasks'], 1):
        para = doc.add_paragraph(style='List Number')
        para.add_run(f"{task['name']}").bold = True
        para.add_run(f"\n   完成时间:{task['date']}")
        para.add_run(f"\n   工作内容:{task['description']}")
    
    # 工作数据表格
    doc.add_heading('三、工作数据', level=1)
    
    table = doc.add_table(rows=1, cols=4)
    table.style = 'Table Grid'
    table.alignment = WD_TABLE_ALIGNMENT.CENTER
    
    # 表头
    headers = ['项目', '计划', '完成', '完成率']
    header_cells = table.rows[0].cells
    for i, header in enumerate(headers):
        header_cells[i].text = header
        header_cells[i].paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
        for run in header_cells[i].paragraphs[0].runs:
            run.font.bold = True
    
    # 数据行
    for item in data['statistics']:
        row = table.add_row()
        row.cells[0].text = item['name']
        row.cells[1].text = str(item['planned'])
        row.cells[2].text = str(item['completed'])
        row.cells[3].text = f"{item['completed']/item['planned']*100:.1f}%"
        for cell in row.cells:
            cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
    
    doc.add_paragraph()
    
    # 下周计划
    doc.add_heading('四、下周计划', level=1)
    for plan in data['next_week_plans']:
        doc.add_paragraph(plan, style='List Bullet')
    
    # 问题与建议
    doc.add_heading('五、问题与建议', level=1)
    if data['issues']:
        for issue in data['issues']:
            doc.add_paragraph(issue, style='List Bullet')
    else:
        doc.add_paragraph('无')
    
    # 页脚
    section = doc.sections[0]
    footer = section.footer
    footer_para = footer.paragraphs[0]
    footer_para.text = f"生成时间:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
    footer_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
    
    # 保存
    doc.save(output_file)
    print(f"报告已生成:{output_file}")

# 使用示例
if __name__ == '__main__':
    report_data = {
        'title': '周工作报告',
        'author': '张三',
        'date': '2024年12月18日',
        'summary': '本周主要完成了项目A的开发工作,并协助测试团队进行了系统测试。',
        'completed_tasks': [
            {'name': '项目A模块开发', 'date': '12月16日', 'description': '完成核心功能模块的开发'},
            {'name': '代码审查', 'date': '12月17日', 'description': '审查团队成员提交的代码'},
            {'name': '系统测试', 'date': '12月18日', 'description': '配合测试团队完成系统测试'},
        ],
        'statistics': [
            {'name': '代码行数', 'planned': 1000, 'completed': 1200},
            {'name': 'Bug修复', 'planned': 10, 'completed': 12},
            {'name': '文档编写', 'planned': 5, 'completed': 4},
        ],
        'next_week_plans': [
            '完成项目A的剩余功能开发',
            '编写技术文档',
            '参与项目B的需求评审',
        ],
        'issues': [
            '服务器资源不足,影响开发效率',
            '部分需求不够明确,需要进一步沟通',
        ]
    }
    
    create_work_report(report_data, '周工作报告.docx')

11. 总结

🔑 核心要点

知识点要点
文档操作Document(), save(), add_heading(), add_paragraph()
段落格式对齐、缩进、间距、行距
文本格式字体、大小、颜色、粗体、斜体
表格add_table(), 合并单元格、样式设置
图片add_picture(), 指定大小
页眉页脚section.header, section.footer

✅ 学习检查清单

  • 能创建和读取Word文档
  • 能设置段落和文本格式
  • 能创建和格式化表格
  • 能插入图片
  • 能设置页眉页脚

📖 下一步学习

掌握了Word操作后,让我们学习PPT自动化:


常见问题 FAQ

💬 python-docx能读取PDF吗?

不能。python-docx只处理.docx格式。读PDF用PyPDF2pdfplumber。读.doc(旧格式)需要先用LibreOffice命令行转换为.docx。

💬 怎么用模板生成Word文档?

两种方式:1)用python-docx读取模板文件,替换占位符文本;2)用docxtpl库,支持Jinja2模板语法,更适合复杂模板。


系列导航

End of file.