Python操作Excel:openpyxl读写Excel,告别重复的复制粘贴
Excel是办公中最常用的数据处理工具。Python可以通过openpyxl库读写Excel文件,实现数据处理、报表生成、格式设置等自动化操作。本篇将详细介绍openpyxl的使用方法。
1. openpyxl简介
pip install openpyxl
from openpyxl import Workbook, load_workbook
from openpyxl.styles import Font, Alignment, Border, Side, PatternFill
from openpyxl.utils import get_column_letter, column_index_from_string
from openpyxl.chart import BarChart, LineChart, PieChart, Reference
# openpyxl vs pandas
# openpyxl:精细控制Excel格式、样式、图表
# pandas:数据分析和批量处理
# 通常结合使用:pandas处理数据,openpyxl设置格式
2. 读取Excel文件
from openpyxl import load_workbook
# 加载工作簿
wb = load_workbook('data.xlsx')
# 只读模式(大文件推荐)
wb = load_workbook('data.xlsx', read_only=True)
# 保留公式(默认会计算结果)
wb = load_workbook('data.xlsx', data_only=False)
# 获取工作表
print(wb.sheetnames) # 所有工作表名称
ws = wb.active # 当前活动工作表
ws = wb['Sheet1'] # 按名称获取
# 读取单元格
cell = ws['A1']
print(cell.value) # 单元格值
print(cell.row) # 行号
print(cell.column) # 列号
print(cell.coordinate) # 坐标,如'A1'
# 另一种方式
cell = ws.cell(row=1, column=1)
print(cell.value)
# 读取一行
for cell in ws[1]: # 第1行
print(cell.value, end=' ')
# 读取一列
for cell in ws['A']: # A列
print(cell.value)
# 读取范围
for row in ws['A1:C3']:
for cell in row:
print(cell.value, end=' ')
print()
# 遍历所有数据
for row in ws.iter_rows(min_row=1, max_row=10, min_col=1, max_col=5):
for cell in row:
print(cell.value, end=' ')
print()
# 获取数据范围
print(ws.dimensions) # 如 'A1:E100'
print(ws.max_row) # 最大行号
print(ws.max_column) # 最大列号
# 转换为列表
data = []
for row in ws.iter_rows(values_only=True):
data.append(row)
3. 写入Excel文件
from openpyxl import Workbook
# 创建新工作簿
wb = Workbook()
ws = wb.active
ws.title = "数据表"
# 写入单元格
ws['A1'] = 'Hello'
ws['B1'] = 'World'
ws.cell(row=2, column=1, value='Python')
# 写入一行
ws.append(['姓名', '年龄', '城市'])
ws.append(['张三', 25, '北京'])
ws.append(['李四', 30, '上海'])
# 批量写入
data = [
['产品', '销量', '金额'],
['产品A', 100, 1000],
['产品B', 200, 2000],
['产品C', 150, 1500],
]
for row in data:
ws.append(row)
# 创建新工作表
ws2 = wb.create_sheet('Sheet2')
ws3 = wb.create_sheet('Sheet3', 0) # 插入到第一个位置
# 复制工作表
ws_copy = wb.copy_worksheet(ws)
# 删除工作表
del wb['Sheet3']
# 保存
wb.save('output.xlsx')
# 关闭(只读模式需要)
wb.close()
4. 单元格操作
from openpyxl import Workbook
from openpyxl.utils import get_column_letter, column_index_from_string
wb = Workbook()
ws = wb.active
# 列号与字母转换
print(get_column_letter(1)) # 'A'
print(get_column_letter(27)) # 'AA'
print(column_index_from_string('A')) # 1
print(column_index_from_string('AA')) # 27
# 设置行高和列宽
ws.row_dimensions[1].height = 30 # 第1行高度
ws.column_dimensions['A'].width = 20 # A列宽度
# 批量设置列宽
for col in range(1, 10):
ws.column_dimensions[get_column_letter(col)].width = 15
# 自动调整列宽(近似)
def auto_adjust_column_width(ws):
for column in ws.columns:
max_length = 0
column_letter = get_column_letter(column[0].column)
for cell in column:
try:
if len(str(cell.value)) > max_length:
max_length = len(str(cell.value))
except:
pass
adjusted_width = (max_length + 2) * 1.2
ws.column_dimensions[column_letter].width = adjusted_width
# 隐藏行/列
ws.row_dimensions[1].hidden = True
ws.column_dimensions['A'].hidden = True
# 冻结窗格
ws.freeze_panes = 'B2' # 冻结第1行和A列
# 插入/删除行列
ws.insert_rows(2) # 在第2行前插入一行
ws.insert_cols(3) # 在第3列前插入一列
ws.delete_rows(2) # 删除第2行
ws.delete_cols(3) # 删除第3列
# 移动单元格范围
ws.move_range('A1:C3', rows=2, cols=1) # 向下移动2行,向右移动1列
5. 样式设置
from openpyxl import Workbook
from openpyxl.styles import (
Font, Alignment, Border, Side, PatternFill,
NamedStyle, numbers
)
wb = Workbook()
ws = wb.active
# 字体
font = Font(
name='微软雅黑',
size=12,
bold=True,
italic=False,
color='FF0000', # 红色
underline='single'
)
ws['A1'].font = font
# 对齐
alignment = Alignment(
horizontal='center', # left, center, right
vertical='center', # top, center, bottom
wrap_text=True, # 自动换行
text_rotation=0 # 文字旋转角度
)
ws['A1'].alignment = alignment
# 边框
thin_border = Border(
left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin')
)
ws['A1'].border = thin_border
# 填充(背景色)
fill = PatternFill(
start_color='FFFF00', # 黄色
end_color='FFFF00',
fill_type='solid'
)
ws['A1'].fill = fill
# 数字格式
ws['B1'] = 1234.5678
ws['B1'].number_format = '#,##0.00' # 千位分隔,2位小数
ws['B2'] = 0.756
ws['B2'].number_format = '0.00%' # 百分比
ws['B3'] = 44927 # Excel日期序列号
ws['B3'].number_format = 'YYYY-MM-DD' # 日期格式
# 创建命名样式(可复用)
header_style = NamedStyle(name='header')
header_style.font = Font(bold=True, size=14)
header_style.alignment = Alignment(horizontal='center')
header_style.fill = PatternFill(start_color='4472C4', fill_type='solid')
header_style.border = thin_border
# 注册并使用
wb.add_named_style(header_style)
ws['A1'].style = 'header'
# 批量设置样式
def set_header_style(ws, row=1):
"""设置表头样式"""
for cell in ws[row]:
cell.font = Font(bold=True, color='FFFFFF')
cell.fill = PatternFill(start_color='4472C4', fill_type='solid')
cell.alignment = Alignment(horizontal='center')
cell.border = thin_border
def set_border_for_range(ws, cell_range):
"""为范围设置边框"""
for row in ws[cell_range]:
for cell in row:
cell.border = thin_border
# 使用
ws.append(['姓名', '年龄', '部门', '薪资'])
ws.append(['张三', 25, '技术部', 10000])
ws.append(['李四', 30, '销售部', 12000])
set_header_style(ws, 1)
set_border_for_range(ws, 'A1:D3')
wb.save('styled.xlsx')
6. 合并单元格
from openpyxl import Workbook
from openpyxl.styles import Alignment
wb = Workbook()
ws = wb.active
# 合并单元格
ws.merge_cells('A1:D1')
ws['A1'] = '合并的标题'
ws['A1'].alignment = Alignment(horizontal='center')
# 另一种方式
ws.merge_cells(start_row=2, start_column=1, end_row=2, end_column=4)
# 取消合并
ws.unmerge_cells('A1:D1')
# 创建表头示例
def create_merged_header(ws):
"""创建合并表头"""
# 第一行:大标题
ws.merge_cells('A1:F1')
ws['A1'] = '2024年度销售报表'
ws['A1'].font = Font(size=16, bold=True)
ws['A1'].alignment = Alignment(horizontal='center')
# 第二行:分类标题
ws.merge_cells('A2:B2')
ws['A2'] = '基本信息'
ws.merge_cells('C2:D2')
ws['C2'] = '销售数据'
ws.merge_cells('E2:F2')
ws['E2'] = '统计信息'
# 第三行:具体列名
headers = ['姓名', '部门', '销量', '金额', '完成率', '排名']
for col, header in enumerate(headers, 1):
ws.cell(row=3, column=col, value=header)
wb.save('merged.xlsx')
7. 插入图表
from openpyxl import Workbook
from openpyxl.chart import BarChart, LineChart, PieChart, Reference
wb = Workbook()
ws = wb.active
# 准备数据
data = [
['月份', '销量', '目标'],
['1月', 100, 120],
['2月', 150, 120],
['3月', 130, 120],
['4月', 180, 150],
['5月', 200, 150],
]
for row in data:
ws.append(row)
# 柱状图
chart = BarChart()
chart.title = '月度销量'
chart.x_axis.title = '月份'
chart.y_axis.title = '销量'
# 数据范围
data_ref = Reference(ws, min_col=2, min_row=1, max_col=3, max_row=6)
categories = Reference(ws, min_col=1, min_row=2, max_row=6)
chart.add_data(data_ref, titles_from_data=True)
chart.set_categories(categories)
chart.shape = 4 # 图表样式
# 放置图表
ws.add_chart(chart, 'E2')
# 折线图
line_chart = LineChart()
line_chart.title = '销量趋势'
line_chart.add_data(data_ref, titles_from_data=True)
line_chart.set_categories(categories)
ws.add_chart(line_chart, 'E18')
# 饼图
ws2 = wb.create_sheet('饼图')
pie_data = [
['产品', '占比'],
['产品A', 30],
['产品B', 25],
['产品C', 20],
['产品D', 15],
['其他', 10],
]
for row in pie_data:
ws2.append(row)
pie_chart = PieChart()
pie_chart.title = '产品占比'
labels = Reference(ws2, min_col=1, min_row=2, max_row=6)
data = Reference(ws2, min_col=2, min_row=1, max_row=6)
pie_chart.add_data(data, titles_from_data=True)
pie_chart.set_categories(labels)
ws2.add_chart(pie_chart, 'D2')
wb.save('charts.xlsx')
8. 数据验证
from openpyxl import Workbook
from openpyxl.worksheet.datavalidation import DataValidation
wb = Workbook()
ws = wb.active
# 下拉列表
dv_list = DataValidation(
type='list',
formula1='"技术部,销售部,财务部,人事部"',
allow_blank=True
)
dv_list.error = '请从列表中选择'
dv_list.errorTitle = '输入错误'
dv_list.prompt = '请选择部门'
dv_list.promptTitle = '部门选择'
ws.add_data_validation(dv_list)
dv_list.add('B2:B100') # 应用到B2:B100
# 数值范围验证
dv_number = DataValidation(
type='whole',
operator='between',
formula1=0,
formula2=100
)
dv_number.error = '请输入0-100之间的整数'
ws.add_data_validation(dv_number)
dv_number.add('C2:C100')
# 日期验证
dv_date = DataValidation(
type='date',
operator='greaterThan',
formula1='2024-01-01'
)
ws.add_data_validation(dv_date)
dv_date.add('D2:D100')
# 添加表头
ws.append(['姓名', '部门', '分数', '入职日期'])
wb.save('validation.xlsx')
9. 公式操作
from openpyxl import Workbook
wb = Workbook()
ws = wb.active
# 添加数据
ws.append(['产品', '单价', '数量', '金额'])
ws.append(['产品A', 100, 10, '=B2*C2'])
ws.append(['产品B', 200, 5, '=B3*C3'])
ws.append(['产品C', 150, 8, '=B4*C4'])
ws.append(['合计', '', '', '=SUM(D2:D4)'])
# 常用公式
ws['E2'] = '=IF(D2>1000,"高","低")'
ws['F2'] = '=VLOOKUP(A2,Sheet2!A:B,2,FALSE)'
ws['G2'] = '=AVERAGE(D2:D4)'
ws['H2'] = '=MAX(D2:D4)'
ws['I2'] = '=MIN(D2:D4)'
ws['J2'] = '=COUNT(D2:D4)'
ws['K2'] = '=COUNTIF(D2:D4,">1000")'
# 使用绝对引用
ws['L2'] = '=B2*$M$1' # M1为税率
# 跨表引用
ws['N2'] = "=Sheet2!A1"
wb.save('formulas.xlsx')
# 读取公式结果
wb2 = load_workbook('formulas.xlsx', data_only=True)
ws2 = wb2.active
print(ws2['D5'].value) # 显示计算结果
10. 实战案例
案例:生成销售报表
"""
实战案例:生成格式化的销售报表
"""
from openpyxl import Workbook
from openpyxl.styles import Font, Alignment, Border, Side, PatternFill
from openpyxl.chart import BarChart, Reference
from openpyxl.utils import get_column_letter
from datetime import datetime
def create_sales_report(data, output_file):
"""
创建销售报表
Args:
data: 销售数据列表,每项包含 (销售员, 产品, 数量, 单价)
output_file: 输出文件路径
"""
wb = Workbook()
ws = wb.active
ws.title = '销售明细'
# 样式定义
title_font = Font(name='微软雅黑', size=16, bold=True)
header_font = Font(name='微软雅黑', size=11, bold=True, color='FFFFFF')
header_fill = PatternFill(start_color='4472C4', fill_type='solid')
center_align = Alignment(horizontal='center', vertical='center')
thin_border = Border(
left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin')
)
# 标题
ws.merge_cells('A1:E1')
ws['A1'] = f'销售报表 - {datetime.now().strftime("%Y年%m月")}'
ws['A1'].font = title_font
ws['A1'].alignment = center_align
ws.row_dimensions[1].height = 30
# 表头
headers = ['销售员', '产品', '数量', '单价', '金额']
for col, header in enumerate(headers, 1):
cell = ws.cell(row=3, column=col, value=header)
cell.font = header_font
cell.fill = header_fill
cell.alignment = center_align
cell.border = thin_border
# 数据
for row_idx, (salesperson, product, qty, price) in enumerate(data, 4):
ws.cell(row=row_idx, column=1, value=salesperson)
ws.cell(row=row_idx, column=2, value=product)
ws.cell(row=row_idx, column=3, value=qty)
ws.cell(row=row_idx, column=4, value=price)
ws.cell(row=row_idx, column=5, value=f'=C{row_idx}*D{row_idx}')
# 设置边框
for col in range(1, 6):
ws.cell(row=row_idx, column=col).border = thin_border
ws.cell(row=row_idx, column=col).alignment = center_align
# 合计行
last_row = len(data) + 4
ws.cell(row=last_row, column=1, value='合计')
ws.cell(row=last_row, column=3, value=f'=SUM(C4:C{last_row-1})')
ws.cell(row=last_row, column=5, value=f'=SUM(E4:E{last_row-1})')
for col in range(1, 6):
cell = ws.cell(row=last_row, column=col)
cell.font = Font(bold=True)
cell.border = thin_border
cell.alignment = center_align
# 设置列宽
column_widths = [12, 15, 10, 10, 12]
for col, width in enumerate(column_widths, 1):
ws.column_dimensions[get_column_letter(col)].width = width
# 数字格式
for row in range(4, last_row + 1):
ws.cell(row=row, column=4).number_format = '¥#,##0.00'
ws.cell(row=row, column=5).number_format = '¥#,##0.00'
# 添加图表
chart = BarChart()
chart.title = '销售金额'
chart.y_axis.title = '金额'
data_ref = Reference(ws, min_col=5, min_row=3, max_row=last_row-1)
categories = Reference(ws, min_col=1, min_row=4, max_row=last_row-1)
chart.add_data(data_ref, titles_from_data=True)
chart.set_categories(categories)
ws.add_chart(chart, 'G3')
# 保存
wb.save(output_file)
print(f'报表已生成:{output_file}')
# 使用示例
if __name__ == '__main__':
sales_data = [
('张三', '产品A', 10, 100),
('张三', '产品B', 5, 200),
('李四', '产品A', 8, 100),
('李四', '产品C', 12, 150),
('王五', '产品B', 6, 200),
('王五', '产品C', 10, 150),
]
create_sales_report(sales_data, '销售报表.xlsx')
11. 总结
🔑 核心要点
| 知识点 | 要点 |
|---|---|
| 读取 | load_workbook(), iter_rows() |
| 写入 | Workbook(), append(), save() |
| 样式 | Font, Alignment, Border, PatternFill |
| 合并 | merge_cells(), unmerge_cells() |
| 图表 | BarChart, LineChart, PieChart |
| 验证 | DataValidation |
| 公式 | 直接写入公式字符串 |
✅ 学习检查清单
- 能读写Excel文件
- 能设置单元格样式
- 能合并单元格
- 能插入图表
- 能添加数据验证
- 能使用公式
📖 下一步学习
掌握了Excel操作后,让我们学习Word文档自动化:
常见问题 FAQ
💬 openpyxl能读.xls文件吗?
不能。openpyxl只支持.xlsx格式。读.xls用xlrd库,或者先用Excel另存为.xlsx。pandas的read_excel()会自动选择引擎。
💬 openpyxl和pandas操作Excel有什么区别?
openpyxl操作单元格级别(格式、合并、图表),pandas操作数据级别(筛选、统计、透视)。生成格式化报表用openpyxl,数据分析用pandas。
� 系列导航
- 上一篇:15 - OA导出数据清洗与字段规范化
- 当前:16 - Python操作Excel
- 下一篇:17 - Python操作Word