每天节省2小时重复劳动!

你有没有过这样的经历:

  • 需要从10个Excel表格里汇总数据,复制粘贴到天荒地老
  • 需要给100个Excel表格添加统一的格式,打开一个改一个
  • 需要根据条件筛选数据,手动筛选到崩溃

别再假装你会手动处理Excel了,用Python吧!

今天我们做一个Excel批量处理工具,几秒钟搞定几小时的工作。

项目目标

实现一个Excel批量处理工具:

  1. 批量合并多个Excel文件
  2. 批量格式化Excel(设置列宽、字体、颜色等)
  3. 批量添加公式和计算列
  4. 批量筛选数据
  5. 生成数据统计报告

准备工作

安装所需库:

pip install openpyxl pandas

工具1:批量合并Excel文件

import os
from openpyxl import load_workbook, Workbook

def merge_excel_files(file_paths, output_path, skip_rows=0):
    """
    批量合并多个Excel文件

    Args:
        file_paths: Excel文件路径列表
        output_path: 输出文件路径
        skip_rows: 跳过每个文件的行数(用于跳过表头)
    """
    # 创建新的Excel文件
    merged_workbook = Workbook()
    merged_sheet = merged_workbook.active
    merged_sheet.title = "合并数据"

    row_index = 1

    for i, file_path in enumerate(file_paths):
        print(f"正在处理第{i+1}/{len(file_paths)}个文件:{os.path.basename(file_path)}")

        try:
            # 读取Excel文件
            workbook = load_workbook(file_path)
            sheet = workbook.active

            # 读取数据
            for row in sheet.iter_rows(min_row=skip_rows + 1, values_only=True):
                if row[0] is not None:  # 跳过空行
                    # 写入数据
                    for col_index, cell_value in enumerate(row, 1):
                        merged_sheet.cell(row=row_index, column=col_index, value=cell_value)
                    row_index += 1

            workbook.close()
            print(f"  - 成功读取 {sheet.max_row} 行数据")

        except Exception as e:
            print(f"  - 读取失败:{e}")

    # 保存文件
    merged_workbook.save(output_path)
    print(f"\n合并完成!共 {row_index - 1} 行数据")
    print(f"保存到:{output_path}")

def merge_folder_excel_files(folder_path, output_path, pattern="*.xlsx"):
    """
    合并文件夹下的所有Excel文件

    Args:
        folder_path: 文件夹路径
        output_path: 输出文件路径
        pattern: 文件匹配模式
    """
    import glob

    # 获取所有Excel文件
    file_paths = glob.glob(os.path.join(folder_path, pattern))

    if not file_paths:
        print("没有找到Excel文件!")
        return

    print(f"找到 {len(file_paths)} 个Excel文件")
    merge_excel_files(file_paths, output_path)

# 使用示例
if __name__ == "__main__":
    # 合并指定文件
    file_paths = [
        "data/sales_1月.xlsx",
        "data/sales_2月.xlsx",
        "data/sales_3月.xlsx"
    ]
    merge_excel_files(file_paths, "output/合并销售数据.xlsx", skip_rows=1)

    # 合并文件夹下的所有Excel
    # merge_folder_excel_files("data", "output/全部数据.xlsx")

工具2:批量格式化Excel

from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter

def format_excel(input_path, output_path):
    """
    格式化Excel文件

    功能:
    - 设置标题行样式
    - 自动调整列宽
    - 添加边框
    - 设置对齐方式
    - 添加条件格式
    """
    # 读取Excel文件
    workbook = load_workbook(input_path)
    sheet = workbook.active

    # 定义样式
    header_font = Font(name="微软雅黑", size=12, bold=True, color="FFFFFF")
    header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
    header_alignment = Alignment(horizontal="center", vertical="center")

    body_font = Font(name="微软雅黑", size=10)
    body_alignment = Alignment(horizontal="center", vertical="center")

    thin_border = Border(
        left=Side(style='thin', color='000000'),
        right=Side(style='thin', color='000000'),
        top=Side(style='thin', color='000000'),
        bottom=Side(style='thin', color='000000')
    )

    # 格式化标题行
    for cell in sheet[1]:
        cell.font = header_font
        cell.fill = header_fill
        cell.alignment = header_alignment
        cell.border = thin_border

    # 格式化数据行
    for row in sheet.iter_rows(min_row=2):
        for cell in row:
            cell.font = body_font
            cell.alignment = body_alignment
            cell.border = thin_border

    # 自动调整列宽
    for col in range(1, sheet.max_column + 1):
        max_length = 0
        column = get_column_letter(col)

        # 计算该列的最大宽度
        for row in range(1, sheet.max_row + 1):
            cell = sheet[f"{column}{row}"]
            try:
                if cell.value:
                    max_length = max(max_length, len(str(cell.value)))
            except:
                pass

        # 设置列宽(留一些余量)
        adjusted_width = min(max_length + 2, 50)
        sheet.column_dimensions[column].width = adjusted_width

    # 冻结首行
    sheet.freeze_panes = "A2"

    # 保存文件
    workbook.save(output_path)
    print(f"格式化完成!保存到:{output_path}")

# 使用示例
if __name__ == "__main__":
    format_excel("data/sales_data.xlsx", "output/销售数据_已格式化.xlsx")

工具3:批量添加计算列

from openpyxl import load_workbook

def add_calculated_column(input_path, output_path, formulas):
    """
    批量添加计算列

    Args:
        input_path: 输入文件路径
        output_path: 输出文件路径
        formulas: 计算列配置列表
            [
                {
                    "column": "D",  # 列名
                    "header": "金额",  # 表头名称
                    "formula": "=B*C"  # 公式(使用列号)
                }
            ]
    """
    workbook = load_workbook(input_path)
    sheet = workbook.active

    for config in formulas:
        column = config["column"]
        header = config["header"]
        formula = config["formula"]

        # 添加表头
        sheet[f"{column}1"] = header

        # 添加公式
        for row in range(2, sheet.max_row + 1):
            cell = sheet[f"{column}{row}"]
            cell.value = formula

        print(f"已添加列【{column}】:{header}")

    # 保存文件
    workbook.save(output_path)
    print(f"计算列添加完成!保存到:{output_path}")

# 使用示例
if __name__ == "__main__":
    formulas = [
        {
            "column": "D",
            "header": "金额",
            "formula": "=B2*C2"
        },
        {
            "column": "E",
            "header": "税率",
            "formula": "=D2*0.1"
        }
    ]

    # 注意:公式需要根据实际行号调整
    # 这里简化处理,实际应用中需要动态生成公式
    add_calculated_column(
        "data/product_list.xlsx",
        "output/product_list_计算.xlsx",
        formulas
    )

工具4:批量筛选数据

from openpyxl import load_workbook, Workbook

def filter_excel(input_path, output_path, filters):
    """
    批量筛选Excel数据

    Args:
        input_path: 输入文件路径
        output_path: 输出文件路径
        filters: 筛选条件列表
            [
                {
                    "column": "C",  # 列名
                    "operator": ">",  # 操作符:>, <, =, >=, <=, !=
                    "value": 100  # 值
                }
            ]
    """
    # 读取数据
    workbook = load_workbook(input_path)
    sheet = workbook.active

    # 读取表头
    headers = [cell.value for cell in sheet[1]]

    # 筛选数据
    filtered_rows = []

    for row in sheet.iter_rows(min_row=2, values_only=True):
        # 默认包含该行
        include = True

        # 检查所有筛选条件
        for filter_config in filters:
            column = filter_config["column"]
            operator = filter_config["operator"]
            value = filter_config["value"]

            # 获取列号
            col_index = ord(column.upper()) - ord("A")

            # 获取单元格值
            cell_value = row[col_index]

            # 执行筛选
            try:
                cell_value = float(cell_value) if cell_value is not None else 0
                value = float(value)

                if operator == ">" and not (cell_value > value):
                    include = False
                    break
                elif operator == "<" and not (cell_value < value):
                    include = False
                    break
                elif operator == "=" and not (cell_value == value):
                    include = False
                    break
                elif operator == ">=" and not (cell_value >= value):
                    include = False
                    break
                elif operator == "<=" and not (cell_value <= value):
                    include = False
                    break
                elif operator == "!=" and not (cell_value != value):
                    include = False
                    break
            except:
                include = False
                break

        if include:
            filtered_rows.append(row)

    # 创建新文件
    filtered_workbook = Workbook()
    filtered_sheet = filtered_workbook.active

    # 写入表头
    for col, header in enumerate(headers, 1):
        filtered_sheet.cell(row=1, column=col, value=header)

    # 写入筛选后的数据
    for row_index, row_data in enumerate(filtered_rows, 2):
        for col, value in enumerate(row_data, 1):
            filtered_sheet.cell(row=row_index, column=col, value=value)

    # 保存文件
    filtered_workbook.save(output_path)
    print(f"筛选完成!保留 {len(filtered_rows)} 行数据")
    print(f"保存到:{output_path}")

# 使用示例
if __name__ == "__main__":
    filters = [
        {
            "column": "D",  # D列(金额)
            "operator": ">",
            "value": 1000  # 金额大于1000
        }
    ]

    filter_excel(
        "data/sales_data.xlsx",
        "output/销售数据_筛选.xlsx",
        filters
    )

工具5:生成数据统计报告

from openpyxl import load_workbook, Workbook
from openpyxl.styles import Font, PatternFill, Alignment
import statistics

def generate_statistics(input_path, output_path):
    """生成数据统计报告"""
    # 读取数据
    workbook = load_workbook(input_path)
    sheet = workbook.active

    # 读取所有数据
    data = list(sheet.iter_rows(values_only=True))
    headers = data[0]  # 第一行是表头
    rows = data[1:]  # 其他是数据

    # 创建报告
    report_workbook = Workbook()
    report_sheet = report_workbook.active
    report_sheet.title = "统计报告"

    # 设置标题样式
    title_font = Font(name="微软雅黑", size=16, bold=True)
    header_font = Font(name="微软雅黑", size=12, bold=True)
    header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
    header_alignment = Alignment(horizontal="center", vertical="center")

    # 写入标题
    report_sheet["A1"] = "数据统计报告"
    report_sheet["A1"].font = title_font
    report_sheet.merge_cells("A1:E1")

    # 写入基本信息
    report_sheet["A3"] = "统计时间:"
    report_sheet["B3"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    report_sheet["A4"] = "总行数:"
    report_sheet["B4"] = len(rows)

    # 写入各列统计信息
    row_index = 6
    report_sheet["A6"] = "列名"
    report_sheet["B6"] = "数据类型"
    report_sheet["C6"] = "非空数量"
    report_sheet["D6"] = "最小值"
    report_sheet["E6"] = "最大值"
    report_sheet["F6"] = "平均值"

    # 设置表头样式
    for col in ["A", "B", "C", "D", "E", "F"]:
        cell = report_sheet[f"{col}6"]
        cell.font = header_font
        cell.fill = header_fill
        cell.alignment = header_alignment

    # 统计每一列
    for col_index, header in enumerate(headers, 1):
        col_letter = chr(ord("A") + col_index - 1)

        # 获取该列所有数据
        col_data = [row[col_index - 1] for row in rows]

        # 统计非空数量
        non_null_count = sum(1 for value in col_data if value is not None)

        # 判断数据类型
        sample = col_data[0] if col_data else None
        if isinstance(sample, (int, float)):
            data_type = "数字"
            min_value = min(col_data) if col_data else ""
            max_value = max(col_data) if col_data else ""
            avg_value = statistics.mean(col_data) if col_data else ""
        else:
            data_type = "文本"
            min_value = ""
            max_value = ""
            avg_value = ""

        # 写入统计信息
        row_index += 1
        report_sheet[f"A{row_index}"] = header
        report_sheet[f"B{row_index}"] = data_type
        report_sheet[f"C{row_index}"] = non_null_count
        report_sheet[f"D{row_index}"] = min_value
        report_sheet[f"E{row_index}"] = max_value
        report_sheet[f"F{row_index}"] = round(avg_value, 2) if avg_value else ""

    # 保存报告
    report_workbook.save(output_path)
    print(f"统计报告生成完成!保存到:{output_path}")

# 使用示例
if __name__ == "__main__":
    import datetime

    generate_statistics(
        "data/sales_data.xlsx",
        "output/统计报告.xlsx"
    )

完整工具集

class ExcelProcessor:
    """Excel批量处理工具集"""

    def __init__(self):
        pass

    def merge_files(self, file_paths, output_path, skip_rows=0):
        """合并多个Excel文件"""
        merge_excel_files(file_paths, output_path, skip_rows)

    def format_file(self, input_path, output_path):
        """格式化Excel文件"""
        format_excel(input_path, output_path)

    def add_calculations(self, input_path, output_path, formulas):
        """添加计算列"""
        add_calculated_column(input_path, output_path, formulas)

    def filter_data(self, input_path, output_path, filters):
        """筛选数据"""
        filter_excel(input_path, output_path, filters)

    def generate_report(self, input_path, output_path):
        """生成统计报告"""
        generate_statistics(input_path, output_path)


# 使用示例
if __name__ == "__main__":
    processor = ExcelProcessor()

    # 一键完成所有操作
    input_file = "data/sales_data.xlsx"

    # 1. 格式化
    processor.format_file(input_file, "output/1_已格式化.xlsx")

    # 2. 筛选
    filters = [{"column": "D", "operator": ">", "value": 1000}]
    processor.filter_data("output/1_已格式化.xlsx", "output/2_已筛选.xlsx", filters)

    # 3. 生成报告
    processor.generate_report("output/2_已筛选.xlsx", "output/3_统计报告.xlsx")

    print("所有操作完成!")

本章小结

  • 批量合并Excel:自动合并多个Excel文件
  • 批量格式化:设置列宽、字体、颜色、边框
  • 添加计算列:自动添加公式和计算字段
  • 批量筛选:根据条件筛选数据
  • 生成报告:自动生成数据统计报告

这个工具可以帮你每天节省2小时处理Excel的时间!下一章我们来学习自动发送邮件,装X利器!

继续学下去,马上就能做实用项目了!