PY-L8-20 · Project: Build a Port Scanner

The Brief

3 min

Build scanner.py, an ethical, useful port scanner:

Authorization allow-list — refuses any host not explicitly permitted (Lesson 19).
Concurrent scanning — a thread pool to scan many ports fast.
Rate limiting — bounded concurrency so it's not aggressive.
Banner grabbing + a self-audit report — what's open, what's there, and "should it be?"

⚠️ This tool targets ONLY your own systems

The allow-list isn't a suggestion — it's a hard gate that makes the scanner refuse unauthorized targets. Use it to audit your own machine, your own lab VMs, or scanme.nmap.org (which explicitly permits scanning). Pointing a scanner at anyone else is a crime (Lesson 19), and this tool is built so you can't do it by accident.

From One Port to Many, Responsibly

5 min

You already have the atom (Lesson 19's port_state) and the gate (assert_authorized). Scanning 1000 ports one at a time with a 1-second timeout would take ~16 minutes — too slow. The fix is concurrency: scan many ports in parallel with a thread pool. But unbounded concurrency is aggressive (and looks like an attack), so we cap it — fast and polite.

Today's big idea

A good scanner balances three things: speed (concurrency), courtesy (bounded rate so you don't hammer the target), and safety (the authorization gate, checked before a single packet). Build all three in and you have a tool that's genuinely useful for self-audits and safe to run.

Build It · The Gated, Concurrent Core

14 min

The authorization gate (first, always)

import socket

# ONLY hosts you own or that explicitly permit scanning:
AUTHORISED = {
    "127.0.0.1", "localhost",
    "scanme.nmap.org",          # nmap's official, scan-permitted test host
    # add your own lab VM IPs here, e.g. "192.168.56.10"
}

def assert_authorized(host: str) -> None:
    # resolve to compare both name and IP forms
    try:
        ip = socket.gethostbyname(host)
    except socket.gaierror:
        ip = host
    if host not in AUTHORISED and ip not in AUTHORISED:
        raise PermissionError(
            f"Refusing to scan '{host}'. Only scan hosts you OWN or that "
            f"explicitly permit it. Add it to AUTHORISED only if you're sure.")

Scanning one port (with banner grab)

def scan_port(host: str, port: int, timeout: float = 1.0) -> dict | None:
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.settimeout(timeout)
        if s.connect_ex((host, port)) != 0:
            return None                       # not open — skip
        banner = ""
        try:
            banner = s.recv(128).decode(errors="replace").strip()
        except (socket.timeout, OSError):
            pass
        return {"port": port, "service": _guess_service(port), "banner": banner}

COMMON = {21: "ftp", 22: "ssh", 23: "telnet", 25: "smtp", 53: "dns",
          80: "http", 443: "https", 3306: "mysql", 5432: "postgres",
          6379: "redis", 8080: "http-alt", 27017: "mongodb"}
def _guess_service(port: int) -> str:
    return COMMON.get(port, "unknown")

Concurrent + rate-limited

from concurrent.futures import ThreadPoolExecutor, as_completed

def scan(host: str, ports, max_workers: int = 100) -> list[dict]:
    assert_authorized(host)                   # GATE — before any packet
    open_ports = []
    # bounded thread pool = concurrency cap = built-in rate limiting
    with ThreadPoolExecutor(max_workers=max_workers) as pool:
        futures = {pool.submit(scan_port, host, p): p for p in ports}
        for fut in as_completed(futures):
            result = fut.result()
            if result:
                open_ports.append(result)
    return sorted(open_ports, key=lambda r: r["port"])

The ThreadPoolExecutor with a max_workers cap is both the speedup and the rate limit: at most 100 connections in flight, so we're fast but never floods the target. The gate runs first, so an unauthorized host fails before a single packet is sent.

Build It · The Self-Audit Report & CLI

12 min

Wrap it in a CLI that produces a defensive report — not just "open ports" but "should this be open?" — pulling in Level 7 reporting.

import argparse, json, logging
from datetime import datetime

logging.basicConfig(level=logging.INFO, format="%(message)s")
log = logging.getLogger("scanner")

# ports that are usually a RED FLAG if exposed beyond localhost
SHOULD_NOT_EXPOSE = {3306, 5432, 6379, 27017, 23, 21}

def audit(open_ports: list[dict], host: str) -> dict:
    findings = []
    for p in open_ports:
        note = "review: is this meant to be reachable?"
        if p["port"] in SHOULD_NOT_EXPOSE:
            note = "⚠️ HIGH: database/admin/plaintext service exposed — firewall it"
        findings.append({**p, "note": note})
    return {"host": host, "scanned_at": datetime.now().isoformat(),
            "open_ports": findings}

def main():
    p = argparse.ArgumentParser(description="Ethical port scanner (own hosts only).")
    p.add_argument("host")
    p.add_argument("--start", type=int, default=1)
    p.add_argument("--end", type=int, default=1024)
    p.add_argument("--out", default="scan.json")
    a = p.parse_args()

    log.info("⚠️  Authorised targets only. Scanning %s ports %d-%d…",
             a.host, a.start, a.end)
    try:
        results = scan(a.host, range(a.start, a.end + 1))
    except PermissionError as e:
        log.error("%s", e); return

    report = audit(results, a.host)
    log.info("\n%d open port(s) on %s:", len(results), a.host)
    for f in report["open_ports"]:
        line = f"  {f['port']:5} {f['service']:10}"
        if f["banner"]:
            line += f" [{f['banner'][:40]}]"
        log.info(line + f"\n        → {f['note']}")
    with open(a.out, "w") as fp:
        json.dump(report, fp, indent=2)
    log.info("\nreport → %s", a.out)

if __name__ == "__main__":
    main()

⚠️  Authorised targets only. Scanning 127.0.0.1 ports 1-1024…

3 open port(s) on 127.0.0.1:
    22    ssh        [SSH-2.0-OpenSSH_9.6]
        → review: is this meant to be reachable?
   3000   unknown
        → review: is this meant to be reachable?
   5432   postgres
        → ⚠️ HIGH: database/admin/plaintext service exposed — firewall it
report → scan.json

# (point it at an unauthorised host → PermissionError, no packets sent)

Read the result

This is a tool you'd actually use to harden your own systems. The authorization gate runs first — try an unlisted host and it refuses before touching the network. The thread pool makes a 1024-port scan finish in seconds while capping concurrency for courtesy. Banner grabbing (Lesson 7) identifies services, and the audit step adds the defender's judgement: an exposed PostgreSQL is flagged HIGH, because a database reachable beyond localhost is a classic misconfiguration (Lesson 34). The JSON report feeds your evidence/reporting workflow (Lesson 46).

Build It Yourself

13 min

Scan 127.0.0.1 (your machine) or scanme.nmap.org (explicitly permitted) only.

01 🟢 Gate first

Build assert_authorized and prove it: try to scan a non-allow-listed host and confirm it raises before any connection attempt. Then scan your own localhost successfully.

02 🟡 Concurrency speedup

Time a 1024-port scan of localhost single-threaded vs. with a 100-worker pool. Report the speedup. Confirm both find the same open ports.

Hint

import time
t = time.perf_counter()
scan("127.0.0.1", range(1, 1025), max_workers=1)
print("serial:", time.perf_counter()-t)
t = time.perf_counter()
scan("127.0.0.1", range(1, 1025), max_workers=100)
print("pool:  ", time.perf_counter()-t)

03 🔴 Self-audit your machine

Run the full scanner against your own machine and produce the audit report. For each open port, write whether it should be open and what you'd do about any that shouldn't (close the service, firewall it, bind to localhost). This is a real defensive deliverable.

Stretch · Compare to nmap & Add a Confirmation

8 min

Two stretches: (1) run the industry-standard nmap against your own localhost and compare its results/output to yours — appreciate what a mature tool adds (SYN scans, OS detection, scripting). (2) Add an interactive confirmation: before scanning, the tool prints the target and requires the user to type the host name to confirm — a human check on top of the allow-list.

Show the confirmation gate

def confirm_target(host: str) -> bool:
    assert_authorized(host)        # machine gate first
    print(f"About to scan: {host}")
    print("This sends packets to the target. Only proceed if you OWN it "
          "or have WRITTEN permission.")
    typed = input(f"Type the host name to confirm ('{host}'): ").strip()
    return typed == host

# in main(): if not confirm_target(a.host): return
# Defence in depth: the allow-list stops accidents in code; the typed
# confirmation stops accidents at runtime.

Non-negotiables: an nmap comparison write-up OR a working typed-confirmation gate layered on top of the allow-list.

Recap

3 min

A responsible scanner is fast, polite, and safe. Concurrency (a ThreadPoolExecutor) makes it fast; a bounded max_workers doubles as rate limiting so it's not aggressive; and an authorization allow-list checked before any packet makes it physically refuse unauthorized targets. Banner grabbing identifies services, and an audit step turns "open ports" into defensive findings ("this database shouldn't be exposed"). The whole point is the self-audit: scanning your own systems to find and fix misconfigurations before attackers do. Mature tools like nmap go further — but you now understand exactly what they're doing and why the ethics are non-negotiable.

Vocabulary Card

ThreadPoolExecutor: Runs many port checks concurrently with a capped number of workers.
authorization gate: A code check that refuses to scan non-permitted hosts.
banner grabbing: Reading a service's greeting to identify its software/version.
self-audit: Scanning your own systems to find exposed services to fix.

Homework

4 min

Finish the scanner with the allow-list gate, concurrency, banner grabbing, and the audit report. Run a self-audit of your own machine and write it up: open ports, services, and a fix for any that shouldn't be exposed. Add one stretch feature. Confirm in writing that your tool refuses non-allow-listed hosts (show the PermissionError).

Sample · self-audit write-up

Scanned: 127.0.0.1, ports 1-1024 (my own machine). Refused to scan
8.8.8.8 → PermissionError (not in allow-list) — gate works.

Open ports found:
  22   ssh       → intended (I use it). Hardened: key-only auth.
  3000 dev srv   → intended during dev; not exposed in prod.
  5432 postgres  → ⚠️ FIX: my dev DB was listening on all interfaces.
                   Changed to bind 127.0.0.1 only, so it's not
                   reachable from the LAN. (Classic misconfig, L8-34.)

Stretch: added a typed-confirmation prompt before scanning, on top
of the allow-list (defence in depth).

Non-negotiables: gated+concurrent scanner with audit report, a real self-audit with at least one fix, proof it refuses unauthorized hosts, and a stretch feature.

import socket # ONLY hosts you own or that explicitly permit scanning: AUTHORISED = { "127.0.0.1", "localhost", "scanme.nmap.org", # nmap's official, scan-permitted test host # add your own lab VM IPs here, e.g. "192.168.56.10" } def assert_authorized(host: str) -> None: # resolve to compare both name and IP forms try: ip = socket.gethostbyname(host) except socket.gaierror: ip = host if host not in AUTHORISED and ip not in AUTHORISED: raise PermissionError( f"Refusing to scan '{host}'. Only scan hosts you OWN or that " f"explicitly permit it. Add it to AUTHORISED only if you're sure.")

def scan_port(host: str, port: int, timeout: float = 1.0) -> dict | None: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.settimeout(timeout) if s.connect_ex((host, port)) != 0: return None # not open — skip banner = "" try: banner = s.recv(128).decode(errors="replace").strip() except (socket.timeout, OSError): pass return {"port": port, "service": _guess_service(port), "banner": banner} COMMON = {21: "ftp", 22: "ssh", 23: "telnet", 25: "smtp", 53: "dns", 80: "http", 443: "https", 3306: "mysql", 5432: "postgres", 6379: "redis", 8080: "http-alt", 27017: "mongodb"} def _guess_service(port: int) -> str: return COMMON.get(port, "unknown")

from concurrent.futures import ThreadPoolExecutor, as_completed def scan(host: str, ports, max_workers: int = 100) -> list[dict]: assert_authorized(host) # GATE — before any packet open_ports = [] # bounded thread pool = concurrency cap = built-in rate limiting with ThreadPoolExecutor(max_workers=max_workers) as pool: futures = {pool.submit(scan_port, host, p): p for p in ports} for fut in as_completed(futures): result = fut.result() if result: open_ports.append(result) return sorted(open_ports, key=lambda r: r["port"])

import argparse, json, logging from datetime import datetime logging.basicConfig(level=logging.INFO, format="%(message)s") log = logging.getLogger("scanner") # ports that are usually a RED FLAG if exposed beyond localhost SHOULD_NOT_EXPOSE = {3306, 5432, 6379, 27017, 23, 21} def audit(open_ports: list[dict], host: str) -> dict: findings = [] for p in open_ports: note = "review: is this meant to be reachable?" if p["port"] in SHOULD_NOT_EXPOSE: note = "⚠️ HIGH: database/admin/plaintext service exposed — firewall it" findings.append({**p, "note": note}) return {"host": host, "scanned_at": datetime.now().isoformat(), "open_ports": findings} def main(): p = argparse.ArgumentParser(description="Ethical port scanner (own hosts only).") p.add_argument("host") p.add_argument("--start", type=int, default=1) p.add_argument("--end", type=int, default=1024) p.add_argument("--out", default="scan.json") a = p.parse_args() log.info("⚠️ Authorised targets only. Scanning %s ports %d-%d…", a.host, a.start, a.end) try: results = scan(a.host, range(a.start, a.end + 1)) except PermissionError as e: log.error("%s", e); return report = audit(results, a.host) log.info("\n%d open port(s) on %s:", len(results), a.host) for f in report["open_ports"]: line = f" {f['port']:5} {f['service']:10}" if f["banner"]: line += f" [{f['banner'][:40]}]" log.info(line + f"\n → {f['note']}") with open(a.out, "w") as fp: json.dump(report, fp, indent=2) log.info("\nreport → %s", a.out) if __name__ == "__main__": main()

⚠️ Authorised targets only. Scanning 127.0.0.1 ports 1-1024… 3 open port(s) on 127.0.0.1: 22 ssh [SSH-2.0-OpenSSH_9.6] → review: is this meant to be reachable? 3000 unknown → review: is this meant to be reachable? 5432 postgres → ⚠️ HIGH: database/admin/plaintext service exposed — firewall it report → scan.json # (point it at an unauthorised host → PermissionError, no packets sent)

import time t = time.perf_counter() scan("127.0.0.1", range(1, 1025), max_workers=1) print("serial:", time.perf_counter()-t) t = time.perf_counter() scan("127.0.0.1", range(1, 1025), max_workers=100) print("pool: ", time.perf_counter()-t)

def confirm_target(host: str) -> bool: assert_authorized(host) # machine gate first print(f"About to scan: {host}") print("This sends packets to the target. Only proceed if you OWN it " "or have WRITTEN permission.") typed = input(f"Type the host name to confirm ('{host}'): ").strip() return typed == host # in main(): if not confirm_target(a.host): return # Defence in depth: the allow-list stops accidents in code; the typed # confirmation stops accidents at runtime.