#!/usr/bin/env python3
"""
Omi Transcript Processor
Extracts actionable insights from Omi transcripts and logs them.

Run daily or on-demand to process new transcripts.
"""

import json
import os
from datetime import datetime, timedelta
from pathlib import Path

TRANSCRIPTS_DIR = Path.home() / ".openclaw/workspace/omi-data/transcripts"
PROCESSED_LOG = Path.home() / ".openclaw/workspace/omi-data/processed.json"
INSIGHTS_DIR = Path.home() / ".openclaw/workspace/omi-data/insights"

def load_processed():
    """Load list of already-processed transcript IDs."""
    if PROCESSED_LOG.exists():
        with open(PROCESSED_LOG) as f:
            return json.load(f)
    return {"processed_ids": [], "last_run": None}

def save_processed(data):
    """Save processed transcript IDs."""
    PROCESSED_LOG.parent.mkdir(parents=True, exist_ok=True)
    with open(PROCESSED_LOG, 'w') as f:
        json.dump(data, f, indent=2)

def get_unprocessed_transcripts():
    """Find transcripts that haven't been processed yet."""
    processed = load_processed()
    processed_ids = set(processed.get("processed_ids", []))
    
    unprocessed = []
    
    # Check last 7 days of folders
    for i in range(7):
        date = datetime.now() - timedelta(days=i)
        date_folder = TRANSCRIPTS_DIR / date.strftime("%Y-%m-%d")
        
        if date_folder.exists():
            for f in date_folder.glob("*.json"):
                if f.stem not in processed_ids:
                    unprocessed.append(f)
    
    return unprocessed

def extract_insights(transcript_path):
    """Extract key insights from a transcript."""
    with open(transcript_path) as f:
        data = json.load(f)
    
    insights = {
        "file": str(transcript_path),
        "timestamp": data.get("created_at") or data.get("started_at"),
        "duration_seconds": data.get("duration"),
        "speakers": [],
        "topics": [],
        "action_items": [],
        "commitments": [],
        "questions": [],
        "summary": ""
    }
    
    # Extract transcript text
    segments = data.get("transcript_segments", [])
    full_text = " ".join([s.get("text", "") for s in segments])
    
    # Get unique speakers
    speakers = set()
    for seg in segments:
        if seg.get("speaker"):
            speakers.add(seg["speaker"])
    insights["speakers"] = list(speakers)
    
    # Simple keyword extraction for topics
    # (A real implementation would use NLP or send to Claude)
    keywords = ["meeting", "appointment", "doctor", "call", "email", "buy", 
                "fix", "reminder", "tomorrow", "deadline", "important"]
    
    text_lower = full_text.lower()
    found_keywords = [kw for kw in keywords if kw in text_lower]
    insights["topics"] = found_keywords
    
    # Flag if it might contain action items
    action_phrases = ["need to", "have to", "should", "must", "don't forget",
                      "remind me", "remember to", "make sure"]
    for phrase in action_phrases:
        if phrase in text_lower:
            insights["action_items"].append(f"Contains '{phrase}' - review transcript")
            break
    
    # Store raw text length for reference
    insights["text_length"] = len(full_text)
    insights["summary"] = f"{len(segments)} segments, {len(speakers)} speakers, {len(full_text)} chars"
    
    return insights

def process_new_transcripts():
    """Process all new transcripts and save insights."""
    unprocessed = get_unprocessed_transcripts()
    
    if not unprocessed:
        print("No new transcripts to process.")
        return []
    
    INSIGHTS_DIR.mkdir(parents=True, exist_ok=True)
    
    processed_data = load_processed()
    new_insights = []
    
    for transcript_path in unprocessed:
        try:
            insights = extract_insights(transcript_path)
            new_insights.append(insights)
            processed_data["processed_ids"].append(transcript_path.stem)
            print(f"✓ Processed: {transcript_path.name}")
            
            # Flag transcripts with potential action items
            if insights["action_items"]:
                print(f"  ⚠️ May contain action items")
                
        except Exception as e:
            print(f"✗ Error processing {transcript_path.name}: {e}")
    
    # Save insights for today
    if new_insights:
        today = datetime.now().strftime("%Y-%m-%d")
        insights_file = INSIGHTS_DIR / f"insights-{today}.json"
        
        # Append to existing insights for today
        existing = []
        if insights_file.exists():
            with open(insights_file) as f:
                existing = json.load(f)
        
        existing.extend(new_insights)
        
        with open(insights_file, 'w') as f:
            json.dump(existing, f, indent=2)
        
        print(f"\n📝 Saved {len(new_insights)} insights to {insights_file.name}")
    
    # Update processed log
    processed_data["last_run"] = datetime.now().isoformat()
    save_processed(processed_data)
    
    return new_insights

def main():
    print("=== Omi Transcript Processor ===\n")
    insights = process_new_transcripts()
    
    if insights:
        print(f"\n=== Summary ===")
        print(f"Processed: {len(insights)} transcripts")
        
        # Show any with action items
        with_actions = [i for i in insights if i["action_items"]]
        if with_actions:
            print(f"\n⚠️ {len(with_actions)} transcripts may contain action items:")
            for i in with_actions:
                print(f"  - {Path(i['file']).name}")

if __name__ == "__main__":
    main()

# TONY-APPROVED: 2026-03-01 | sha:58b94f38
