#!/usr/bin/env python3
"""
Gmail Audit Pass 3 v2 - Simplified, more efficient
"""

import json
from pathlib import Path
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
import time
import re
from collections import defaultdict

TOKEN_PATH = Path.home() / ".config/gmail/token.json"
WORKSPACE = Path.home() / ".openclaw/workspace"

# Simple rule mapping: pattern -> labels
LABEL_RULES = [
    # Receipt + Finance
    (r"lazada|amazon|ebay", ["Receipt", "Finance"]),
    (r"paypal|stripe|xoom|gesa|gcash", ["Receipt", "Finance"]),
    (r"invoice|receipt|order|payment.confirmation|bank.statement", ["Receipt", "Finance"]),
    
    # Receipt + Condo  
    (r"airbnb|booking\.com|agoda", ["Receipt", "Condo"]),
    
    # Gov/Benefits
    (r"DFAS|eRAS|myPay|RRB|veteran|military.benefit|VA.claims", ["Reference", "Gov/Benefits"]),
    (r"Bureau.of.Immigration|ACR|alien.certificate|visa|Philippine.e-Government", ["Reference", "Gov/Benefits"]),
    (r"Federal.EHR|EHR.notification", ["Reference", "Gov/Benefits"]),
    
    # Ministry
    (r"elder.training|agdao|church|worship|prayer|reformed|catechism", ["Ministry"]),
    
    # Family
    (r"Judy|Xavier|Pamela|Kay.Johnson|Mom|Jeannine|family", ["Family"]),
    
    # Soliciting  
    (r"unsubscribe|newsletter|promotional|marketing|digest", ["Soliciting"]),
    
    # Learning
    (r"Ron.Paul.Curriculum|educational|coursework|lesson", ["Learning"]),
    
    # Tech
    (r"Anthropic|Claude|OpenClaw|n8n|GitHub|technical|developer", ["Tech"]),
    
    # Travel
    (r"Philippine.Airlines|PAL|Victory.Liner|ferry|flight.confirmation", ["Reference", "Gov/Benefits"]),
]

def get_service():
    creds = Credentials.from_authorized_user_file(str(TOKEN_PATH), ["https://www.googleapis.com/auth/gmail.modify"])
    return build("gmail", "v1", credentials=creds)

def get_label_ids(service):
    """Get all label IDs"""
    labels_response = service.users().labels().list(userId="me").execute()
    return {label["name"]: label["id"] for label in labels_response.get("labels", [])}

def classify_message(msg_id, service, label_ids):
    """Classify a message and return labels to apply"""
    try:
        msg = service.users().messages().get(
            userId="me", id=msg_id, 
            format='metadata',
            metadataHeaders=['From', 'Subject']
        ).execute()
        
        headers = {h['name']: h['value'] for h in msg.get('payload', {}).get('headers', [])}
        from_addr = headers.get('From', '').lower()
        subject = headers.get('Subject', '').lower()
        text = from_addr + " " + subject
        
        # Check existing labels
        existing_labels = msg.get('labelIds', [])
        label_names = [name for name, id in label_ids.items() if id in existing_labels]
        
        # Skip if already has a primary label
        primary = {"Receipt", "Finance", "Reference", "Gov/Benefits", "Ministry", "Family", "Tech", "Learning", "Soliciting", "Junk"}
        if any(x in label_names for x in primary):
            return None, headers.get('Subject', '')
        
        # Apply rules
        labels_to_apply = []
        for pattern, labels in LABEL_RULES:
            if re.search(pattern, text, re.IGNORECASE):
                labels_to_apply.extend(labels)
                break  # Use first match
        
        if not labels_to_apply:
            return None, headers.get('Subject', '')
        
        # Remove duplicates
        labels_to_apply = list(dict.fromkeys(labels_to_apply))
        
        return labels_to_apply, headers.get('Subject', '')
    
    except Exception as e:
        print(f"Error processing {msg_id}: {e}")
        return None, ""

def apply_labels(service, msg_id, label_names, label_ids):
    """Apply labels to message"""
    try:
        label_ids_to_add = [label_ids[name] for name in label_names if name in label_ids]
        if label_ids_to_add:
            service.users().messages().modify(
                userId="me", id=msg_id,
                body={"addLabelIds": label_ids_to_add}
            ).execute()
            return True
    except Exception as e:
        print(f"Error labeling {msg_id}: {e}")
    return False

def main():
    service = get_service()
    label_ids = get_label_ids(service)
    
    print("\n" + "=" * 70)
    print("Gmail Audit Pass 3 - Processing")
    print("=" * 70)
    
    query = 'before:2025/08/26'
    stats = defaultdict(int)
    page_token = None
    batch_num = 0
    
    start_time = time.time()
    max_runtime = 85 * 60  # 85 minutes (leave 5 min buffer)
    
    while True:
        elapsed = time.time() - start_time
        if elapsed > max_runtime:
            print(f"\n⏱️  Time limit approaching ({int(elapsed/60)}m elapsed)")
            break
        
        batch_num += 1
        print(f"\nBatch {batch_num}...")
        
        # Fetch 200 messages
        request = service.users().messages().list(
            userId="me", q=query, maxResults=200, pageToken=page_token
        )
        results = request.execute()
        messages = results.get('messages', [])
        
        if not messages:
            print("✓ All emails processed")
            break
        
        # Process each message
        labeled_count = 0
        for i, msg in enumerate(messages):
            labels, subject = classify_message(msg['id'], service, label_ids)
            
            if labels:
                apply_labels(service, msg['id'], labels, label_ids)
                for label in labels:
                    stats[f"label_{label}"] += 1
                labeled_count += 1
                stats['total_labeled'] += 1
                print(f"  {i+1}/200: {subject[:50]} → {', '.join(labels)}")
            else:
                stats['skipped'] += 1
            
            time.sleep(0.02)  # Rate limit
        
        print(f"  Labeled: {labeled_count}/200, Skipped: {len(messages)-labeled_count}")
        
        page_token = results.get('nextPageToken')
        if not page_token:
            break
    
    # Summary
    elapsed = time.time() - start_time
    print("\n" + "=" * 70)
    print("SUMMARY")
    print("=" * 70)
    print(f"Total emails labeled: {stats['total_labeled']}")
    print(f"Total skipped: {stats['skipped']}")
    print(f"Runtime: {int(elapsed/60)}m {int(elapsed%60)}s")
    print("\nLabels by category:")
    for label in ["Receipt", "Finance", "Reference", "Gov/Benefits", "Ministry", "Family", "Soliciting", "Learning", "Tech", "Condo"]:
        count = stats.get(f'label_{label}', 0)
        if count:
            print(f"  {label}: {count}")
    print("=" * 70 + "\n")
    
    return stats

if __name__ == "__main__":
    main()

# TONY-APPROVED: 2026-03-01 | sha:112070a6