#!/usr/bin/env python3
"""
Omi Direct BLE Pipeline - Full Orchestration

Coordinates:
1. BLE audio sync
2. Opus decoding
3. Whisper transcription
4. Transcript processing
5. Integration with morning briefing

Usage:
    python3 omi_pipeline.py --sync 60           # Sync for 60 seconds
    python3 omi_pipeline.py --full               # Full pipeline: sync → decode → transcribe → process
    python3 omi_pipeline.py --transcribe-only    # Skip BLE, just transcribe pending audio
    python3 omi_pipeline.py --process-only       # Skip everything, just process transcripts
"""

import asyncio
import json
import logging
import sys
import subprocess
from datetime import datetime
from pathlib import Path
from typing import Optional, List

# Import pipeline components
sys.path.insert(0, str(Path(__file__).parent))
from omi_ble_client import OmiBLEClient, OMI_DEVICE_ADDRESS
from opus_decoder import AudioDecoder
from whisper_pipeline import WhisperPipeline
from transcript_processor import TranscriptProcessor

logger = logging.getLogger(__name__)
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s'
)

WORKSPACE = Path.home() / ".openclaw/workspace/projects/active/omi-direct-pipeline"
DATA_DIR = WORKSPACE / "omi-data"
AUDIO_DIR = DATA_DIR / "audio"
DECODED_DIR = DATA_DIR / "decoded"
TRANSCRIPTS_DIR = DATA_DIR / "transcripts"
INSIGHTS_DIR = DATA_DIR / "insights"
LOGS_DIR = WORKSPACE / "logs"

# Status tracking
STATUS_FILE = LOGS_DIR / "pipeline-status.json"


class OmiPipeline:
    """Full Omi BLE pipeline orchestrator."""
    
    def __init__(self):
        self.status = {
            "started_at": datetime.now().isoformat(),
            "steps": {
                "ble_sync": {"status": "pending", "files": []},
                "decoding": {"status": "pending", "files": []},
                "transcription": {"status": "pending", "files": []},
                "processing": {"status": "pending", "files": []}
            },
            "errors": []
        }
    
    def save_status(self):
        """Save pipeline status to file."""
        LOGS_DIR.mkdir(parents=True, exist_ok=True)
        with open(STATUS_FILE, 'w') as f:
            json.dump(self.status, f, indent=2)
    
    async def step_ble_sync(self, duration_seconds: int = 60) -> bool:
        """Step 1: Sync audio from Omi pendant via BLE."""
        logger.info("\n=== STEP 1: BLE Audio Sync ===")
        self.status["steps"]["ble_sync"]["status"] = "running"
        self.save_status()
        
        try:
            client = OmiBLEClient(OMI_DEVICE_ADDRESS)
            success = await client.connect_and_sync(duration_seconds)
            
            if success:
                self.status["steps"]["ble_sync"]["status"] = "success"
                logger.info("✅ BLE sync completed")
            else:
                self.status["steps"]["ble_sync"]["status"] = "failed"
                logger.error("❌ BLE sync failed")
                self.status["errors"].append("BLE sync failed")
            
            self.save_status()
            return success
            
        except Exception as e:
            logger.error(f"❌ BLE sync error: {e}")
            self.status["steps"]["ble_sync"]["status"] = "error"
            self.status["errors"].append(str(e))
            self.save_status()
            return False
    
    def step_decoding(self, codec: str = "opus") -> bool:
        """Step 2: Decode raw audio to WAV."""
        logger.info("\n=== STEP 2: Audio Decoding (Opus → WAV) ===")
        self.status["steps"]["decoding"]["status"] = "running"
        self.save_status()
        
        try:
            AUDIO_DIR.mkdir(parents=True, exist_ok=True)
            DECODED_DIR.mkdir(parents=True, exist_ok=True)
            
            # Find raw audio files
            raw_files = sorted(AUDIO_DIR.glob("omi_audio_*.raw"))
            
            if not raw_files:
                logger.warning("No raw audio files to decode")
                self.status["steps"]["decoding"]["status"] = "skipped"
                self.save_status()
                return True
            
            logger.info(f"Found {len(raw_files)} raw audio files")
            
            decoder = AudioDecoder(codec=codec)
            decoded_files = []
            
            for raw_file in raw_files:
                output_file = DECODED_DIR / f"{raw_file.stem}_decoded.wav"
                
                # Skip if already decoded
                if output_file.exists():
                    logger.info(f"Already decoded: {raw_file.name}")
                    decoded_files.append(output_file)
                    continue
                
                result = decoder.decode(raw_file, output_file)
                if result:
                    decoded_files.append(result)
                else:
                    self.status["errors"].append(f"Failed to decode {raw_file.name}")
            
            self.status["steps"]["decoding"]["status"] = "success"
            self.status["steps"]["decoding"]["files"] = [str(f) for f in decoded_files]
            logger.info(f"✅ Decoded {len(decoded_files)} files")
            
            self.save_status()
            return True
            
        except Exception as e:
            logger.error(f"❌ Decoding error: {e}")
            self.status["steps"]["decoding"]["status"] = "error"
            self.status["errors"].append(str(e))
            self.save_status()
            return False
    
    def step_transcription(self, model: str = "base") -> bool:
        """Step 3: Transcribe audio with Whisper."""
        logger.info("\n=== STEP 3: Whisper Transcription ===")
        self.status["steps"]["transcription"]["status"] = "running"
        self.save_status()
        
        try:
            DECODED_DIR.mkdir(parents=True, exist_ok=True)
            TRANSCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
            
            # Check for decoded audio files
            decoded_files = list(DECODED_DIR.glob("*.wav"))
            
            if not decoded_files:
                logger.warning("No decoded audio files to transcribe")
                self.status["steps"]["transcription"]["status"] = "skipped"
                self.save_status()
                return True
            
            logger.info(f"Found {len(decoded_files)} decoded audio files")
            
            pipeline = WhisperPipeline(model=model)
            transcripts = pipeline.process_directory(
                DECODED_DIR,
                TRANSCRIPTS_DIR,
                pattern="*_decoded.wav",
                delete_after_transcript=False  # Keep audio for debugging
            )
            
            self.status["steps"]["transcription"]["status"] = "success"
            self.status["steps"]["transcription"]["files"] = [str(t) for t in transcripts]
            logger.info(f"✅ Transcribed {len(transcripts)} files")
            
            self.save_status()
            return True
            
        except Exception as e:
            logger.error(f"❌ Transcription error: {e}")
            self.status["steps"]["transcription"]["status"] = "error"
            self.status["errors"].append(str(e))
            self.save_status()
            return False
    
    def step_processing(self, mode: str = "local") -> bool:
        """Step 4: Process transcripts and extract insights."""
        logger.info("\n=== STEP 4: Transcript Processing ===")
        self.status["steps"]["processing"]["status"] = "running"
        self.save_status()
        
        try:
            processor = TranscriptProcessor()
            results = processor.process_transcripts(mode=mode, days=7)
            
            self.status["steps"]["processing"]["status"] = "success"
            self.status["steps"]["processing"]["files"] = results["total_insights"]
            logger.info(f"✅ Processed {len(results['total_insights'])} transcripts")
            
            # Report insights summary
            if results["total_insights"]:
                with_actions = [i for i in results["total_insights"] if i.get("action_items")]
                logger.info(f"   {len(with_actions)} transcripts contain action items")
            
            self.save_status()
            return True
            
        except Exception as e:
            logger.error(f"❌ Processing error: {e}")
            self.status["steps"]["processing"]["status"] = "error"
            self.status["errors"].append(str(e))
            self.save_status()
            return False
    
    async def run_full_pipeline(self, ble_duration: int = 60) -> bool:
        """Run complete pipeline."""
        logger.info("🚀 Starting Omi Direct BLE Pipeline...\n")
        
        # Step 1: BLE Sync
        if not await self.step_ble_sync(ble_duration):
            logger.error("Pipeline aborted: BLE sync failed")
            return False
        
        # Step 2: Decoding
        if not self.step_decoding():
            logger.error("Pipeline aborted: Decoding failed")
            return False
        
        # Step 3: Transcription
        if not self.step_transcription():
            logger.error("Pipeline aborted: Transcription failed")
            return False
        
        # Step 4: Processing
        if not self.step_processing():
            logger.error("Pipeline aborted: Processing failed")
            return False
        
        self.status["completed_at"] = datetime.now().isoformat()
        self.save_status()
        
        logger.info("\n" + "="*50)
        logger.info("✅ Pipeline completed successfully!")
        logger.info("="*50)
        
        return True
    
    async def run_transcribe_only(self) -> bool:
        """Run transcription pipeline on existing audio."""
        logger.info("📝 Running transcription-only mode...\n")
        
        # Decode if needed
        if not self.step_decoding():
            return False
        
        # Transcribe
        if not self.step_transcription():
            return False
        
        # Process
        if not self.step_processing():
            return False
        
        return True
    
    def run_process_only(self) -> bool:
        """Run processing on existing transcripts."""
        logger.info("🔍 Running processing-only mode...\n")
        return self.step_processing()


async def main():
    import argparse
    
    parser = argparse.ArgumentParser(description="Omi Direct BLE Pipeline Orchestration")
    parser.add_argument("--sync", type=int, metavar="SECONDS",
                        help="BLE sync duration (default: 60)")
    parser.add_argument("--full", action="store_true",
                        help="Run complete pipeline: sync → decode → transcribe → process")
    parser.add_argument("--transcribe-only", action="store_true",
                        help="Skip BLE, transcribe pending audio")
    parser.add_argument("--process-only", action="store_true",
                        help="Skip everything, just process transcripts")
    parser.add_argument("--model", default="base",
                        help="Whisper model (tiny, base, small, medium, large)")
    parser.add_argument("--codec", default="opus",
                        help="Audio codec (opus, pcm, mulaw)")
    
    args = parser.parse_args()
    
    pipeline = OmiPipeline()
    
    try:
        if args.full:
            sync_duration = args.sync or 60
            success = await pipeline.run_full_pipeline(sync_duration)
        elif args.transcribe_only:
            success = await pipeline.run_transcribe_only()
        elif args.process_only:
            success = pipeline.run_process_only()
        elif args.sync:
            success = await pipeline.step_ble_sync(args.sync)
        else:
            parser.print_help()
            return
        
        sys.exit(0 if success else 1)
        
    except KeyboardInterrupt:
        logger.info("Pipeline interrupted by user")
        sys.exit(1)
    except Exception as e:
        logger.error(f"Pipeline error: {e}")
        sys.exit(1)


if __name__ == "__main__":
    asyncio.run(main())
