#!/usr/bin/env python3
"""
Gmail Audit & Labeling Tool for Dustin's inbox
Handles: label creation, bulk search, batch labeling, and reporting
"""

import os, sys, json, base64, re
from pathlib import Path
from datetime import datetime

CREDS_PATH = Path.home() / ".config/gmail/credentials.json"
TOKEN_PATH = Path.home() / ".config/gmail/token.json"
SCOPES = ["https://www.googleapis.com/auth/gmail.modify"]

def get_service():
    try:
        from google.oauth2.credentials import Credentials
        from google.auth.transport.requests import Request
        from google_auth_oauthlib.flow import InstalledAppFlow
        from googleapiclient.discovery import build
    except ImportError:
        import subprocess
        venv_pip = Path.home() / ".openclaw/workspace/scripts/youtube-playlists/venv/bin/pip"
        if venv_pip.exists():
            subprocess.run([str(venv_pip), "install", "-q",
                           "google-auth", "google-auth-oauthlib", "google-api-python-client"],
                          capture_output=True)
        from google.oauth2.credentials import Credentials
        from google.auth.transport.requests import Request
        from google_auth_oauthlib.flow import InstalledAppFlow
        from googleapiclient.discovery import build

    creds = None
    if TOKEN_PATH.exists():
        creds = Credentials.from_authorized_user_file(str(TOKEN_PATH), SCOPES)
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
            TOKEN_PATH.write_text(creds.to_json())
        else:
            if not CREDS_PATH.exists():
                print("ERROR: No credentials.json found at ~/.config/gmail/credentials.json")
                sys.exit(1)
            flow = InstalledAppFlow.from_client_secrets_file(str(CREDS_PATH), SCOPES)
            creds = flow.run_local_server(port=0)
            TOKEN_PATH.parent.mkdir(parents=True, exist_ok=True)
            TOKEN_PATH.write_text(creds.to_json())

    return build("gmail", "v1", credentials=creds)


def create_labels(service, label_names):
    """Create labels if they don't exist. Returns mapping of label_name -> label_id"""
    # Get existing labels
    existing = service.users().labels().list(userId="me").execute()
    existing_map = {l["name"]: l["id"] for l in existing.get("labels", [])}
    
    result = {}
    for name in label_names:
        if name in existing_map:
            result[name] = existing_map[name]
            print(f"✓ Label exists: {name}")
        else:
            # Create new label
            label = service.users().labels().create(userId="me", body={
                "name": name,
                "labelListVisibility": "labelShow",
                "messageListVisibility": "show"
            }).execute()
            result[name] = label["id"]
            print(f"✓ Label created: {name}")
    
    return result


def get_label_ids(service):
    """Get all label IDs and return as a dict"""
    result = service.users().labels().list(userId="me").execute()
    return {l["name"]: l["id"] for l in result.get("labels", [])}


def search_old_emails(service, before_date="2025-08-26", batch_size=50):
    """Search for emails older than specified date. Returns list of message dicts with metadata"""
    query = f'before:{before_date} in:inbox'
    messages = []
    page_token = None
    
    while True:
        try:
            result = service.users().messages().list(
                userId="me", q=query, maxResults=batch_size, pageToken=page_token
            ).execute()
            batch = result.get("messages", [])
            if not batch:
                break
            messages.extend(batch)
            page_token = result.get("nextPageToken")
            if not page_token:
                break
        except Exception as e:
            print(f"ERROR during search: {e}")
            break
    
    return messages


def get_message_metadata(service, msg_id):
    """Get message headers and snippet"""
    try:
        detail = service.users().messages().get(
            userId="me", id=msg_id, format="full",
            metadataHeaders=["Subject", "From", "To", "Date", "Message-ID"]
        ).execute()
        headers = {h["name"]: h["value"] for h in detail["payload"].get("headers", [])}
        
        # Extract snippet/body
        snippet = detail.get("snippet", "")
        
        # Extract plain text body
        body = ""
        payload = detail.get("payload", {})
        if payload.get("mimeType") == "text/plain":
            data = payload.get("body", {}).get("data", "")
            if data:
                body = base64.urlsafe_b64decode(data + "==").decode("utf-8", errors="replace")[:300]
        elif "parts" in payload:
            for part in payload["parts"]:
                if part.get("mimeType") == "text/plain":
                    data = part.get("body", {}).get("data", "")
                    if data:
                        body = base64.urlsafe_b64decode(data + "==").decode("utf-8", errors="replace")[:300]
                        break
        
        return {
            "id": msg_id,
            "from": headers.get("From", ""),
            "to": headers.get("To", ""),
            "subject": headers.get("Subject", "(no subject)"),
            "date": headers.get("Date", ""),
            "snippet": snippet,
            "body": body,
            "labels": detail.get("labelIds", [])
        }
    except Exception as e:
        print(f"ERROR getting metadata for {msg_id}: {e}")
        return None


def apply_labels(service, msg_id, label_ids):
    """Apply multiple labels to a message"""
    try:
        service.users().messages().modify(
            userId="me", id=msg_id,
            body={"addLabelIds": label_ids}
        ).execute()
        return True
    except Exception as e:
        print(f"ERROR applying labels to {msg_id}: {e}")
        return False


def categorize_email(msg_data):
    """Determine which labels to apply based on email content. Returns list of label names"""
    labels = []
    
    from_email = msg_data.get("from", "").lower()
    subject = msg_data.get("subject", "").lower()
    to_email = msg_data.get("to", "").lower()
    body = msg_data.get("body", "").lower()
    snippet = msg_data.get("snippet", "").lower()
    
    content = f"{from_email} {subject} {body} {snippet}".lower()
    
    # Auto-label rules
    
    # Etsy
    if "etsy.com" in from_email or "transaction@etsy.com" in from_email:
        labels.extend(["Receipt", "Etsy"])
    
    # Airbnb / Booking (Condo)
    elif "airbnb.com" in from_email or "booking.com" in from_email:
        labels.extend(["Receipt", "Condo"])
    
    # Finance / Payments
    elif any(x in from_email for x in ["paypal.com", "stripe.com"]) or any(x in subject for x in ["invoice", "receipt", "order confirmation", "payment"]):
        labels.extend(["Receipt", "Finance"])
    
    # Gov/Benefits
    elif any(x in from_email for x in ["rrb.gov", "va.gov"]) or any(x in subject for x in ["rrb", "veteran", "railroad retirement", "benefits"]):
        labels.append("Gov/Benefits")
    
    # Ministry
    elif any(x in from_email for x in ["church", "reformed", "crc"]) or any(x in subject for x in ["elder", "worship", "agdao", "ministry", "congregation"]):
        labels.append("Ministry")
    
    # Learning
    elif "ronpaulcurriculum.com" in from_email:
        labels.append("Learning")
    
    # Tech
    elif any(x in from_email for x in ["anthropic", "openclaw", "n8n"]) or any(x in subject for x in ["gateway", "n8n", "api", "deployment"]):
        labels.append("Tech")
    
    # Family
    elif any(domain in from_email for domain in ["@gmail.com", "@yahoo.com", "yahoo.com"]):
        family_contacts = ["judy", "xavier", "pamela", "ohman", "judi147", "duane", "dustin"]
        if any(contact in from_email.lower() for contact in family_contacts):
            labels.append("Family")
    
    # Soliciting (newsletters, marketing)
    if "unsubscribe" in body or any(x in from_email for x in ["newsletter", "marketing", "promo", "news@"]):
        if "Soliciting" not in labels:
            labels.append("Soliciting")
    
    # Flag as Reference if substantive
    if any(x in subject for x in ["decision", "plan", "strategy", "important", "medical", "financial", "property", "health"]):
        if "Reference" not in labels:
            labels.append("Reference")
    elif "reference" in subject.lower() or len(body) > 200:
        if "Reference" not in labels and "Receipt" not in labels and "Soliciting" not in labels:
            labels.append("Reference")
    
    return list(set(labels))  # Remove duplicates


def audit_emails(service):
    """Run the full audit process"""
    print("=" * 60)
    print("GMAIL AUDIT - Creating Labels & Processing Old Emails")
    print("=" * 60)
    
    # Define labels
    tier1_labels = ["Action/Reply", "Action/Follow-Up", "Reference", "Receipt", "Junk", "Soliciting"]
    tier2_labels = ["Ministry", "Etsy", "Condo", "Finance", "Family", "Health", "Learning", "Gov/Benefits", "Tech"]
    all_labels = tier1_labels + tier2_labels
    
    # Step 1: Create labels
    print("\n[Step 1] Creating label system...")
    label_ids = create_labels(service, all_labels)
    print(f"✓ {len(label_ids)} labels ready\n")
    
    # Step 2: Search old emails
    print("[Step 2] Searching for emails older than Aug 26, 2025...")
    old_messages = search_old_emails(service, before_date="2025-08-26", batch_size=50)
    print(f"✓ Found {len(old_messages)} emails\n")
    
    if not old_messages:
        print("No old emails found. Audit complete.")
        return {
            "emails_scanned": 0,
            "labels_created": len(label_ids),
            "by_category": {},
            "reference_items": [],
            "junk_pile": [],
            "ambiguous": []
        }
    
    # Step 3: Process emails in batches
    print("[Step 3] Processing emails (batch 1)...")
    stats = {
        "receipt": 0,
        "etsy": 0,
        "condo": 0,
        "finance": 0,
        "ministry": 0,
        "family": 0,
        "learning": 0,
        "gov": 0,
        "tech": 0,
        "soliciting": 0,
        "reference": 0,
        "junk": 0
    }
    
    reference_items = []
    junk_pile = []
    ambiguous = []
    processed = 0
    
    # Process first batch (up to 50 emails for this subagent run)
    for i, msg in enumerate(old_messages[:50]):
        msg_id = msg["id"]
        metadata = get_message_metadata(service, msg_id)
        
        if not metadata:
            continue
        
        # Categorize
        assigned_labels = categorize_email(metadata)
        
        if not assigned_labels:
            # No auto-categorization — flag as ambiguous
            ambiguous.append({
                "from": metadata["from"],
                "subject": metadata["subject"],
                "date": metadata["date"]
            })
            continue
        
        # Track stats
        for label in assigned_labels:
            label_lower = label.lower().replace("/", "_").replace("-", "_")
            if label_lower in stats:
                stats[label_lower] += 1
        
        # Apply labels
        label_ids_to_apply = [label_ids[l] for l in assigned_labels if l in label_ids]
        if label_ids_to_apply:
            apply_labels(service, msg_id, label_ids_to_apply)
        
        # Track reference items for Nancy
        if "Reference" in assigned_labels:
            reference_items.append({
                "from": metadata["from"],
                "subject": metadata["subject"],
                "date": metadata["date"],
                "snippet": metadata["snippet"],
                "labels": assigned_labels
            })
        
        # Track junk
        if "Junk" in assigned_labels or "Soliciting" in assigned_labels:
            junk_pile.append({
                "from": metadata["from"],
                "subject": metadata["subject"]
            })
        
        processed += 1
        if (processed + 1) % 10 == 0:
            print(f"  Processed {processed + 1}...")
    
    print(f"✓ Processed {processed} emails\n")
    
    # Compile results
    results = {
        "emails_scanned": processed,
        "labels_created": len(label_ids),
        "total_old_emails": len(old_messages),
        "by_category": stats,
        "reference_items": reference_items,
        "junk_pile": junk_pile,
        "ambiguous": ambiguous
    }
    
    return results


if __name__ == "__main__":
    if len(sys.argv) > 1 and sys.argv[1] == "audit":
        svc = get_service()
        results = audit_emails(svc)
        # Output as JSON for parsing
        print("\n[AUDIT_RESULTS]")
        print(json.dumps(results, indent=2))
    else:
        print("Usage: gmail-audit.py audit")