#!/usr/bin/env python3
"""Create an organized library of EP documents grouped by category/year."""

from __future__ import annotations

import argparse
import re
import shutil
from pathlib import Path
from typing import Iterable

DEFAULT_EXTENSIONS = {
    ".pdf",
    ".doc",
    ".docx",
    ".rtf",
    ".txt",
    ".md",
    ".ppt",
    ".pptx",
    ".xls",
    ".xlsx",
    ".ods",
    ".csv",
    ".tsv",
    ".json",
}

YEAR_PATTERN = re.compile(r"(19|20)\d{2}")


def sanitize(name: str) -> str:
    return re.sub(r"[^a-zA-Z0-9_-]", "_", name)


def detect_year(parts: Iterable[str]) -> str:
    for part in parts:
        for token in re.split(r"[\s_\-\.]+", part):
            if YEAR_PATTERN.fullmatch(token):
                return token
    return "0000"


def organize(root: Path, dest: Path, exts: set[str], mode: str = "copy") -> None:
    if dest.exists() and mode == "copy":
        # refresh destination to avoid stale files
        shutil.rmtree(dest)
    dest.mkdir(parents=True, exist_ok=True)

    for path in root.rglob("*"):
        if not path.is_file():
            continue
        if path.suffix.lower() not in exts:
            continue
        rel = path.relative_to(root)
        top = rel.parts[0] if rel.parts else "root"
        year = detect_year(rel.parts + (path.stem,))
        category = sanitize(top.lower())
        filename = f"{year}_{sanitize(path.stem)}{path.suffix.lower()}"
        target = dest / category / year / filename
        target.parent.mkdir(parents=True, exist_ok=True)
        if mode == "copy":
            shutil.copy2(path, target)
        else:
            shutil.move(path, target)


def main() -> None:
    parser = argparse.ArgumentParser(description="Organize Dropbox research docs")
    parser.add_argument("--root", default="dropbox", help="Dropbox root path")
    parser.add_argument("--output", default="docs/ep_library", help="Output folder")
    parser.add_argument("--mode", choices=["copy", "move"], default="copy")
    args = parser.parse_args()

    root = Path(args.root).resolve()
    if not root.exists():
        raise SystemExit(f"Root directory {root} not found")
    output = Path(args.output).resolve()
    organize(root, output, DEFAULT_EXTENSIONS, args.mode)
    print(f"Organized documents into {output}")


if __name__ == "__main__":
    main()
