#!/usr/bin/env python3
"""
Etsy Archive Auditor
Scans /Users/tonyclaw/Documents/Etsy Designs/04-Archive/ and generates a
quality report flagging listings with missing files or weak metadata.

Output: /Users/tonyclaw/.openclaw/workspace/drafts/etsy/archive-audit-{date}.md
"""

import os
import sys
import re
import glob
from datetime import datetime
from pathlib import Path

ARCHIVE_DIR = Path("/Users/tonyclaw/Documents/Etsy Designs/04-Archive")
OUTPUT_DIR  = Path("/Users/tonyclaw/.openclaw/workspace/drafts/etsy")
DATE_STR    = datetime.now().strftime("%Y-%m-%d")
OUTPUT_FILE = OUTPUT_DIR / f"archive-audit-{DATE_STR}.md"

# Folders that are not product listings — skip them
META_FOLDERS = {
    "listed", "rejected", "source-pngs", "archive", "whats-included",
    "cleanup-20260210", "cleanup-20260211", "cleanup-20260212",
}

MIN_TITLE_LEN   = 40
MIN_TAGS_COUNT  = 13
SVG_KEYWORDS    = ["svg", "SVG"]
# Common niche keywords — if NONE are present, flag as weak
NICHE_KEYWORDS  = [
    "hunting", "christmas", "halloween", "faith", "christian", "cross",
    "easter", "military", "patriot", "eagle", "floral", "wedding", "deer",
    "dog", "cat", "horse", "flower", "bird", "baby", "mom", "dad",
    "teacher", "nurse", "football", "baseball", "basketball", "fishing",
    "camping", "skull", "anchor", "nautical", "western", "farmhouse",
]

# ─── Severity buckets ─────────────────────────────────────────────────────────
critical = []   # missing required files
warnings = []   # weak title issues
infos    = []   # tag/improvement suggestions

stats = {
    "total": 0,
    "critical": 0,
    "warning": 0,
    "info": 0,
    "clean": 0,
}


def parse_listing(listing_path: Path) -> dict:
    """Parse a -listing.txt file and extract TITLE and TAGS."""
    data = {"title": None, "tags": []}
    try:
        text = listing_path.read_text(encoding="utf-8", errors="replace")
        # TITLE
        m = re.search(r"^TITLE:\s*(.+)$", text, re.MULTILINE)
        if m:
            data["title"] = m.group(1).strip()
        # TAGS — grab the line(s) after "TAGS:" until blank line or PRICE:
        m_tags = re.search(r"^TAGS:\s*\n(.*?)(?:\n\n|\nPRICE:|\Z)", text, re.MULTILINE | re.DOTALL)
        if m_tags:
            raw = m_tags.group(1).strip()
            # Skip if raw is empty or looks like it's the next section
            if raw and not raw.startswith("PRICE:") and not raw.startswith("==="):
                data["tags"] = [t.strip() for t in raw.split(",") if t.strip()]
            else:
                data["tags"] = []
    except Exception as e:
        data["parse_error"] = str(e)
    return data


def audit_folder(folder: Path) -> dict:
    """Audit a single archive folder, return dict with issues."""
    issues = {"critical": [], "warning": [], "info": []}
    files  = {f.name for f in folder.iterdir() if f.is_file()}

    # ── Required file checks ──────────────────────────────────────────────────
    has_car  = any(f.endswith("-car.jpg")      for f in files)
    has_zip  = any(f.endswith(".zip")          for f in files)
    # listing.txt might be named anything with -listing.txt
    listing_files = [folder / f for f in files if f.endswith("-listing.txt")]
    has_listing = len(listing_files) > 0

    if not has_car:
        issues["critical"].append("Missing `*-car.jpg` mockup")
    if not has_listing:
        issues["critical"].append("Missing `*-listing.txt` file")
    if not has_zip:
        issues["critical"].append("Missing ZIP file")

    # ── Metadata quality checks ───────────────────────────────────────────────
    if has_listing:
        meta = parse_listing(listing_files[0])

        title = meta.get("title", "")
        tags  = meta.get("tags", [])

        # Title: too short
        if title and len(title) < MIN_TITLE_LEN:
            issues["warning"].append(
                f"Title too short ({len(title)} chars, min {MIN_TITLE_LEN}): `{title}`"
            )

        # Title: missing SVG keyword
        if title and not any(kw in title for kw in SVG_KEYWORDS):
            issues["warning"].append(
                f"Title missing SVG keyword: `{title}`"
            )

        # Title: missing niche keyword
        if title:
            title_lower = title.lower()
            has_niche = any(kw in title_lower for kw in NICHE_KEYWORDS)
            if not has_niche:
                issues["info"].append(
                    f"Title may be missing a niche keyword (check manually): `{title}`"
                )

        # Tags: missing or too few
        if not tags:
            issues["critical"].append("TAGS field is empty or missing in listing.txt")
        elif len(tags) < MIN_TAGS_COUNT:
            issues["warning"].append(
                f"Only {len(tags)} tags (need {MIN_TAGS_COUNT}): {', '.join(tags)}"
            )
        else:
            # Exactly 13+ is fine; note if more than 13
            if len(tags) > 13:
                issues["info"].append(
                    f"Has {len(tags)} tags — Etsy allows max 13; extra tags ignored"
                )

        if "parse_error" in meta:
            issues["warning"].append(f"Could not parse listing.txt: {meta['parse_error']}")

    return issues


def main():
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

    all_folders = sorted([f for f in ARCHIVE_DIR.iterdir() if f.is_dir()])
    # Skip known meta/utility folders that aren't product listings
    folders = [
        f for f in all_folders
        if f.name not in META_FOLDERS and not f.name.startswith("cleanup-")
    ]
    skipped = len(all_folders) - len(folders)
    stats["total"] = len(folders)

    for folder in folders:
        issues = audit_folder(folder)

        folder_critical = issues["critical"]
        folder_warnings = issues["warning"]
        folder_infos    = issues["info"]

        if folder_critical:
            critical.append((folder.name, folder_critical, folder_warnings, folder_infos))
            stats["critical"] += 1
        elif folder_warnings:
            warnings.append((folder.name, [], folder_warnings, folder_infos))
            stats["warning"] += 1
        elif folder_infos:
            infos.append((folder.name, [], [], folder_infos))
            stats["info"] += 1
        else:
            stats["clean"] += 1

    # ── Build report ─────────────────────────────────────────────────────────
    lines = [
        f"# Etsy Archive Audit — {DATE_STR}",
        "",
        f"**Archive:** `{ARCHIVE_DIR}`  ",
        f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M %Z')}  ",
        "",
        "---",
        "",
        "## Summary",
        "",
        f"| Stat | Count |",
        f"|------|-------|",
        f"| Total listings audited | {stats['total']} |",
        f"| 🔴 CRITICAL | {stats['critical']} |",
        f"| 🟡 WARNING | {stats['warning']} |",
        f"| 🔵 INFO | {stats['info']} |",
        f"| ✅ Clean | {stats['clean']} |",
        f"| ⏭️ Skipped (meta) | {skipped} |",
        "",
        "---",
        "",
    ]

    # ── CRITICAL ─────────────────────────────────────────────────────────────
    if critical:
        lines.append("## 🔴 CRITICAL — Missing Required Files")
        lines.append("")
        for name, crits, warns, infs in critical:
            lines.append(f"### `{name}`")
            for c in crits:
                lines.append(f"- ❌ **CRITICAL:** {c}")
            for w in warns:
                lines.append(f"- ⚠️ WARNING: {w}")
            for i in infs:
                lines.append(f"- ℹ️ INFO: {i}")
            lines.append("")

    # ── WARNING ───────────────────────────────────────────────────────────────
    if warnings:
        lines.append("## 🟡 WARNING — Metadata Issues")
        lines.append("")
        for name, crits, warns, infs in warnings:
            lines.append(f"### `{name}`")
            for w in warns:
                lines.append(f"- ⚠️ WARNING: {w}")
            for i in infs:
                lines.append(f"- ℹ️ INFO: {i}")
            lines.append("")

    # ── INFO ──────────────────────────────────────────────────────────────────
    if infos:
        lines.append("## 🔵 INFO — Improvement Suggestions")
        lines.append("")
        for name, crits, warns, infs in infos:
            lines.append(f"### `{name}`")
            for i in infs:
                lines.append(f"- ℹ️ INFO: {i}")
            lines.append("")

    if not critical and not warnings and not infos:
        lines.append("## ✅ All Clean!")
        lines.append("")
        lines.append("No issues found across all archive listings.")
        lines.append("")

    lines.append("---")
    lines.append("")
    lines.append("*Generated by etsy-archive-audit.py*")

    report = "\n".join(lines)
    OUTPUT_FILE.write_text(report, encoding="utf-8")
    print(f"Report saved: {OUTPUT_FILE}")

    # Summary for cron delivery
    print(f"\n=== AUDIT SUMMARY ===")
    print(f"Total: {stats['total']} | Critical: {stats['critical']} | Warning: {stats['warning']} | Info: {stats['info']} | Clean: {stats['clean']}")

    # Exit code: 1 if any issues found (so cron job can conditionally notify)
    has_issues = stats["critical"] > 0 or stats["warning"] > 0 or stats["info"] > 0
    return has_issues


if __name__ == "__main__":
    has_issues = main()
    sys.exit(1 if has_issues else 0)

# TONY-APPROVED: 2026-03-01 | sha:d547464a
