PY-L7-22 · Project: Multi-Format Report Generator

The Brief

3 min

Build report.py <data.csv> that, in one run, produces a folder report-<timestamp>/ containing:

summary.xlsx — the data, styled, with a per-category total and a frozen header.
cover.pdf — a one-page cover with title, date, row count, and grand total.
manifest.json — metadata about the run: source file, counts, totals, generated-at time.

It should log every stage, handle a missing/empty CSV gracefully, and be re-runnable without clobbering previous reports (the timestamp folder ensures that).

Plan the Pipeline

5 min

Before coding, sketch the stages — good automation is just a clear pipeline:

1. LOAD     read the CSV → list of dict rows           (Lesson 15)
2. ANALYSE  totals, per-category breakdown, row count   (dict/Counter)
3. OUTPUT   make the timestamped folder                 (Lesson 5, 11)
4. EXCEL    write styled summary.xlsx                    (Lesson 19)
5. PDF      draw cover.pdf with reportlab                (Lesson 21)
6. JSON     dump manifest.json                           (Lesson 17)
7. LOG      narrate each step                            (Lessons 13-14)

Project mindset

Each stage is a small function that does one thing and hands its result to the next. Build and test them one at a time — don't write all 80 lines then run once. A pipeline you assemble stage-by-stage is a pipeline you can debug.

Build It · Stages 1-3 (Load, Analyse, Folder)

14 min

Load & analyse

import csv, json, logging
from pathlib import Path
from datetime import datetime
from collections import defaultdict

logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
log = logging.getLogger("report")

def load(csv_path: str) -> list[dict]:
    rows = []
    with open(csv_path, newline="", encoding="utf-8") as f:
        rows = list(csv.DictReader(f))
    if not rows:
        raise ValueError(f"{csv_path} is empty or has no data rows")
    log.info("loaded %d rows from %s", len(rows), csv_path)
    return rows

def analyse(rows: list[dict]) -> dict:
    by_cat = defaultdict(float)
    total = 0.0
    for r in rows:
        amount = float(r.get("amount", 0) or 0)
        by_cat[r.get("category", "uncategorised")] += amount
        total += amount
    return {
        "row_count": len(rows),
        "grand_total": round(total, 2),
        "by_category": {k: round(v, 2) for k, v in by_cat.items()},
    }

Make the output folder

def make_folder() -> Path:
    stamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    out = Path(f"report-{stamp}")
    out.mkdir(parents=True, exist_ok=True)
    log.info("output → %s", out)
    return out

The timestamp guarantees each run gets its own folder — re-runnable, never overwriting. These three functions are the backbone; everything else writes files into the folder.

Build It · Stages 4-7 (Excel, PDF, JSON, Wire-up)

12 min

Excel summary

from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill

def write_excel(rows: list[dict], stats: dict, out: Path) -> Path:
    wb = Workbook(); ws = wb.active; ws.title = "Data"
    headers = list(rows[0].keys())
    ws.append(headers)
    for cell in ws[1]:
        cell.font = Font(bold=True, color="FFFFFF")
        cell.fill = PatternFill("solid", fgColor="2563EB")
    for r in rows:
        ws.append([r.get(h, "") for h in headers])
    ws.freeze_panes = "A2"

    # category breakdown on a second sheet
    cat = wb.create_sheet("By Category")
    cat.append(["Category", "Total"])
    for c, t in sorted(stats["by_category"].items(),
                       key=lambda kv: kv[1], reverse=True):
        cat.append([c, t])
    cat.append(["GRAND TOTAL", stats["grand_total"]])

    path = out / "summary.xlsx"
    wb.save(path)
    log.info("wrote %s", path.name)
    return path

PDF cover & JSON manifest

from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4

def write_cover(stats: dict, source: str, out: Path) -> Path:
    path = out / "cover.pdf"
    c = canvas.Canvas(str(path), pagesize=A4)
    c.setFont("Helvetica-Bold", 24)
    c.drawString(72, 760, "Data Report")
    c.setFont("Helvetica", 12)
    c.drawString(72, 720, f"Source: {source}")
    c.drawString(72, 700, f"Generated: {datetime.now():%Y-%m-%d %H:%M}")
    c.drawString(72, 680, f"Rows: {stats['row_count']}")
    c.drawString(72, 660, f"Grand total: {stats['grand_total']:,.2f}")
    c.save()
    log.info("wrote %s", path.name)
    return path

def write_manifest(stats: dict, source: str, out: Path) -> Path:
    manifest = {
        "source": source,
        "generated_at": datetime.now().isoformat(),
        **stats,
    }
    path = out / "manifest.json"
    path.write_text(json.dumps(manifest, indent=2), encoding="utf-8")
    log.info("wrote %s", path.name)
    return path

Wire it together

import argparse, sys

def main():
    p = argparse.ArgumentParser(description="Multi-format report generator.")
    p.add_argument("csv", help="source CSV file")
    args = p.parse_args()

    try:
        rows = load(args.csv)
    except (FileNotFoundError, ValueError) as e:
        log.error("%s", e)
        sys.exit(1)

    stats = analyse(rows)
    out = make_folder()
    write_excel(rows, stats, out)
    write_cover(stats, args.csv, out)
    write_manifest(stats, args.csv, out)
    log.info("done → %s (%d rows, total %.2f)",
             out, stats["row_count"], stats["grand_total"])

if __name__ == "__main__":
    main()

$ python report.py sales.csv
INFO loaded 412 rows from sales.csv
INFO output → report-20260528-143012
INFO wrote summary.xlsx
INFO wrote cover.pdf
INFO wrote manifest.json
INFO done → report-20260528-143012 (412 rows, total 84210.50)

Read the result

Six focused functions, one main() that wires them in order — and you have a tool that turns a raw CSV into three professional deliverables every time. The structure (load → analyse → output stages, each logged, errors handled at the boundary) is the template for every automation in this level. Schedule it (Lesson 35) and the report lands on someone's desk every morning with no human touch.

Extend It Yourself

13 min

01 🟢 Build the base

Get the core working: load a CSV with category and amount columns and produce the three files. Test with a tiny CSV you can verify by eye.

02 🟡 Add a chart

Add a bar chart of category totals to the Excel file using openpyxl's BarChart. (Look up openpyxl.chart.BarChart and Reference.) Many managers want the picture, not the table.

Hint

from openpyxl.chart import BarChart, Reference

chart = BarChart()
data = Reference(cat, min_col=2, min_row=1, max_row=cat.max_row - 1)
labels = Reference(cat, min_col=1, min_row=2, max_row=cat.max_row - 1)
chart.add_data(data, titles_from_data=True)
chart.set_categories(labels)
cat.add_chart(chart, "D2")

03 🔴 Zip & deliver

After generating the folder, zip it (Lesson 7's shutil.make_archive) into report-<timestamp>.zip so the whole bundle is one attachable file. Log the final archive path and size.

Hint

import shutil
archive = shutil.make_archive(str(out), "zip", str(out))
log.info("bundled → %s (%.0f KB)",
         archive, Path(archive).stat().st_size / 1000)

Stretch · Make It Configurable

8 min

Real tools adapt to the data. Add argparse options: --category-col and --amount-col (so it works on any CSV, not just ones with those exact headers), --title for the cover, and --top N to show only the N biggest categories. Validate that the named columns actually exist and error helpfully if not.

Show the key additions

p.add_argument("--category-col", default="category")
p.add_argument("--amount-col", default="amount")
p.add_argument("--title", default="Data Report")
p.add_argument("--top", type=int, help="only the N biggest categories")

# after load(), validate columns:
missing = [c for c in (args.category_col, args.amount_col)
           if c not in rows[0]]
if missing:
    log.error("CSV missing columns: %s", ", ".join(missing))
    sys.exit(1)

# in analyse(), use the chosen column names;
# in the breakdown, slice to args.top if given:
items = sorted(stats["by_category"].items(),
               key=lambda kv: kv[1], reverse=True)
if args.top:
    items = items[:args.top]

Non-negotiables: configurable column names, column-existence validation, --title and --top working end to end.

Recap

3 min

You combined the whole data-files arc into one deliverable: CSV in (Lesson 15), analysed with dict aggregation, out to a styled Excel workbook (Lesson 19), a reportlab PDF cover (Lesson 21), and a JSON manifest (Lesson 17) — all in a timestamped folder, narrated by logging (Lessons 13-14) and guarded at the boundary. The real takeaway is the shape: small single-purpose functions wired by a main() into a load → analyse → output pipeline. That structure scales from this project to the capstone in Lesson 47.

Vocabulary Card

pipeline: A sequence of stages, each transforming data for the next.
manifest: A metadata file describing what a run produced and when.
deliverable: A finished output a stakeholder receives (report, archive, file).
boundary validation: Checking inputs once, up front, then trusting them downstream.

Homework

4 min

Finish and polish the report generator with at least the base three outputs plus one extension (chart, zip, or configurable columns). Then run it on a real CSV from your own life — bank statement export, a class gradebook, a game's stats — and write two sentences on what the generated report told you that the raw CSV didn't.

Sample · what good submission looks like

report.py runs end-to-end on real data, producing:
  report-20260528-143012/
    summary.xlsx   (styled, 2 sheets, bar chart)
    cover.pdf      (title, date, totals)
    manifest.json  (source, counts, totals, generated_at)
  report-20260528-143012.zip   (the whole bundle)

Reflection: "The category sheet showed 60% of my spending was
in one category I'd never have noticed scrolling the raw CSV —
the chart made it obvious in a glance."

Non-negotiables: working three outputs + one extension, run on real data, a genuine insight from the report.

1. LOAD read the CSV → list of dict rows (Lesson 15) 2. ANALYSE totals, per-category breakdown, row count (dict/Counter) 3. OUTPUT make the timestamped folder (Lesson 5, 11) 4. EXCEL write styled summary.xlsx (Lesson 19) 5. PDF draw cover.pdf with reportlab (Lesson 21) 6. JSON dump manifest.json (Lesson 17) 7. LOG narrate each step (Lessons 13-14)

import csv, json, logging from pathlib import Path from datetime import datetime from collections import defaultdict logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") log = logging.getLogger("report") def load(csv_path: str) -> list[dict]: rows = [] with open(csv_path, newline="", encoding="utf-8") as f: rows = list(csv.DictReader(f)) if not rows: raise ValueError(f"{csv_path} is empty or has no data rows") log.info("loaded %d rows from %s", len(rows), csv_path) return rows def analyse(rows: list[dict]) -> dict: by_cat = defaultdict(float) total = 0.0 for r in rows: amount = float(r.get("amount", 0) or 0) by_cat[r.get("category", "uncategorised")] += amount total += amount return { "row_count": len(rows), "grand_total": round(total, 2), "by_category": {k: round(v, 2) for k, v in by_cat.items()}, }

def make_folder() -> Path: stamp = datetime.now().strftime("%Y%m%d-%H%M%S") out = Path(f"report-{stamp}") out.mkdir(parents=True, exist_ok=True) log.info("output → %s", out) return out

from openpyxl import Workbook from openpyxl.styles import Font, PatternFill def write_excel(rows: list[dict], stats: dict, out: Path) -> Path: wb = Workbook(); ws = wb.active; ws.title = "Data" headers = list(rows[0].keys()) ws.append(headers) for cell in ws[1]: cell.font = Font(bold=True, color="FFFFFF") cell.fill = PatternFill("solid", fgColor="2563EB") for r in rows: ws.append([r.get(h, "") for h in headers]) ws.freeze_panes = "A2" # category breakdown on a second sheet cat = wb.create_sheet("By Category") cat.append(["Category", "Total"]) for c, t in sorted(stats["by_category"].items(), key=lambda kv: kv[1], reverse=True): cat.append([c, t]) cat.append(["GRAND TOTAL", stats["grand_total"]]) path = out / "summary.xlsx" wb.save(path) log.info("wrote %s", path.name) return path

from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import A4 def write_cover(stats: dict, source: str, out: Path) -> Path: path = out / "cover.pdf" c = canvas.Canvas(str(path), pagesize=A4) c.setFont("Helvetica-Bold", 24) c.drawString(72, 760, "Data Report") c.setFont("Helvetica", 12) c.drawString(72, 720, f"Source: {source}") c.drawString(72, 700, f"Generated: {datetime.now():%Y-%m-%d %H:%M}") c.drawString(72, 680, f"Rows: {stats['row_count']}") c.drawString(72, 660, f"Grand total: {stats['grand_total']:,.2f}") c.save() log.info("wrote %s", path.name) return path def write_manifest(stats: dict, source: str, out: Path) -> Path: manifest = { "source": source, "generated_at": datetime.now().isoformat(), **stats, } path = out / "manifest.json" path.write_text(json.dumps(manifest, indent=2), encoding="utf-8") log.info("wrote %s", path.name) return path

import argparse, sys def main(): p = argparse.ArgumentParser(description="Multi-format report generator.") p.add_argument("csv", help="source CSV file") args = p.parse_args() try: rows = load(args.csv) except (FileNotFoundError, ValueError) as e: log.error("%s", e) sys.exit(1) stats = analyse(rows) out = make_folder() write_excel(rows, stats, out) write_cover(stats, args.csv, out) write_manifest(stats, args.csv, out) log.info("done → %s (%d rows, total %.2f)", out, stats["row_count"], stats["grand_total"]) if __name__ == "__main__": main()

$ python report.py sales.csv INFO loaded 412 rows from sales.csv INFO output → report-20260528-143012 INFO wrote summary.xlsx INFO wrote cover.pdf INFO wrote manifest.json INFO done → report-20260528-143012 (412 rows, total 84210.50)

from openpyxl.chart import BarChart, Reference chart = BarChart() data = Reference(cat, min_col=2, min_row=1, max_row=cat.max_row - 1) labels = Reference(cat, min_col=1, min_row=2, max_row=cat.max_row - 1) chart.add_data(data, titles_from_data=True) chart.set_categories(labels) cat.add_chart(chart, "D2")

p.add_argument("--category-col", default="category") p.add_argument("--amount-col", default="amount") p.add_argument("--title", default="Data Report") p.add_argument("--top", type=int, help="only the N biggest categories") # after load(), validate columns: missing = [c for c in (args.category_col, args.amount_col) if c not in rows[0]] if missing: log.error("CSV missing columns: %s", ", ".join(missing)) sys.exit(1) # in analyse(), use the chosen column names; # in the breakdown, slice to args.top if given: items = sorted(stats["by_category"].items(), key=lambda kv: kv[1], reverse=True) if args.top: items = items[:args.top]