#!/usr/bin/env python3
"""
YouTube Smart Playlist Manager
Manages Dustin's curated YouTube playlists with automatic refresh.

QUOTA USAGE ESTIMATES (per run):
  - Channel-based playlists: 0 units (RSS) + 50 (create) + 50*N (insert)
  - Search-based playlists: 100*Q (search.list) + 50 (create) + 50*N (insert)
  - Hybrid playlists: 0 (RSS channels) + 100*Q (search) + 50 (create) + 50*N (insert)

Daily quota: 10,000 units (shared across all YouTube API operations)
RSS feeds cost ZERO quota units and return 15 most recent videos per channel.
"""

import os
import json
import pickle
import urllib.request
import xml.etree.ElementTree as ET
from datetime import datetime, timedelta
from pathlib import Path
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

# Paths
WORKSPACE = Path.home() / ".openclaw" / "workspace"
CREDENTIALS_FILE = WORKSPACE / "secrets" / "youtube-oauth-credentials.json"
TOKEN_FILE = WORKSPACE / "secrets" / "youtube-token.pickle"
CONFIG_FILE = WORKSPACE / "youtube-channels.json"

# YouTube API scopes
SCOPES = ['https://www.googleapis.com/auth/youtube']


def get_authenticated_service():
    """Get authenticated YouTube API service."""
    creds = None
    
    # Load existing token
    if TOKEN_FILE.exists():
        with open(TOKEN_FILE, 'rb') as token:
            creds = pickle.load(token)
    
    # Refresh or get new credentials
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            if not CREDENTIALS_FILE.exists():
                raise FileNotFoundError(f"Credentials file not found: {CREDENTIALS_FILE}")
            flow = InstalledAppFlow.from_client_secrets_file(str(CREDENTIALS_FILE), SCOPES)
            creds = flow.run_local_server(port=8080)
        
        # Save token
        TOKEN_FILE.parent.mkdir(parents=True, exist_ok=True)
        with open(TOKEN_FILE, 'wb') as token:
            pickle.dump(creds, token)
    
    return build('youtube', 'v3', credentials=creds)


def load_config():
    """Load playlist configuration."""
    with open(CONFIG_FILE, 'r') as f:
        return json.load(f)


def get_date_suffix():
    """Get current date in YYMMDD format."""
    return datetime.now().strftime('%y%m%d')


def parse_duration(duration_str):
    """Parse ISO 8601 duration to minutes."""
    # PT#H#M#S format
    import re
    match = re.match(r'PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?', duration_str)
    if not match:
        return 0
    hours = int(match.group(1) or 0)
    minutes = int(match.group(2) or 0)
    seconds = int(match.group(3) or 0)
    return hours * 60 + minutes + seconds / 60


def filter_by_duration(videos, min_minutes=None, max_minutes=None):
    """
    Filter videos by duration.
    
    Note: RSS-based videos have duration_minutes=0 (not available from RSS feed).
    Videos with duration=0 are NOT filtered out (pass-through), assuming they're
    RSS-based and the caller will handle duration filtering differently if needed.
    """
    filtered = []
    for video in videos:
        duration = video.get('duration_minutes', 0)
        
        # Skip duration filtering for videos with no duration data (RSS-based)
        if duration == 0:
            filtered.append(video)
            continue
        
        if min_minutes and duration < min_minutes:
            continue
        if max_minutes and duration > max_minutes:
            continue
        filtered.append(video)
    return filtered


def get_channel_videos_via_rss(channel_id, channel_name, days_back=7, max_results=50):
    """
    Get recent videos from a channel via YouTube RSS feed (NO API QUOTA COST).
    
    RSS feeds return the 15 most recent videos.
    This function fetches the feed, parses XML, filters by age, and returns video list.
    
    Returns: list of dicts with 'video_id', 'title', 'channel', 'published', 'duration_minutes'
    Note: duration_minutes is not available from RSS, set to 0 (caller must fetch via API if needed)
    """
    videos = []
    rss_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}"
    
    try:
        # Fetch RSS feed
        with urllib.request.urlopen(rss_url, timeout=10) as response:
            xml_data = response.read()
        
        # Parse XML with namespaces
        root = ET.fromstring(xml_data)
        
        # Define namespaces
        ns = {
            'atom': 'http://www.w3.org/2005/Atom',
            'media': 'http://search.yahoo.com/mrss/',
            'yt': 'http://www.youtube.com/xml/schemas/2015'
        }
        
        # Calculate cutoff date
        cutoff_date = datetime.utcnow() - timedelta(days=days_back)
        
        # Parse feed entries
        for entry in root.findall('atom:entry', ns):
            # Extract video ID
            video_id_elem = entry.find('yt:videoId', ns)
            if video_id_elem is None:
                continue
            video_id = video_id_elem.text
            
            # Extract title
            title_elem = entry.find('atom:title', ns)
            title = title_elem.text if title_elem is not None else 'Unknown'
            
            # Extract publish date
            published_elem = entry.find('atom:published', ns)
            if published_elem is None:
                continue
            published_str = published_elem.text
            
            # Parse ISO 8601 date
            try:
                if published_str.endswith('Z'):
                    published_dt = datetime.fromisoformat(published_str[:-1])
                else:
                    published_dt = datetime.fromisoformat(published_str.replace('Z', '+00:00').split('+')[0])
            except ValueError:
                continue
            
            # Filter by days_back
            if published_dt < cutoff_date:
                continue
            
            videos.append({
                'video_id': video_id,
                'title': title,
                'channel': channel_name,
                'published': published_str,
                'duration_minutes': 0  # Not available from RSS; caller can fetch via API if needed
            })
        
        if len(videos) > max_results:
            videos = videos[:max_results]
    
    except urllib.error.URLError as e:
        print(f"Error fetching RSS feed for channel {channel_id}: {e}")
    except ET.ParseError as e:
        print(f"Error parsing RSS feed for channel {channel_id}: {e}")
    except Exception as e:
        print(f"Unexpected error fetching videos for channel {channel_id}: {e}")
    
    return videos


def get_channel_videos(youtube, channel_id, days_back=7, max_results=50):
    """
    DEPRECATED: Use get_channel_videos_via_rss() instead.
    Kept for backwards compatibility but RSS version should be preferred.
    
    Original API-based function (costs 100 quota units per call).
    """
    videos = []
    published_after = (datetime.utcnow() - timedelta(days=days_back)).isoformat() + 'Z'
    
    try:
        # Search for channel's videos (COSTS 100 QUOTA UNITS)
        request = youtube.search().list(
            part='snippet',
            channelId=channel_id,
            order='date',
            publishedAfter=published_after,
            maxResults=min(max_results, 50),
            type='video'
        )
        response = request.execute()
        
        video_ids = [item['id']['videoId'] for item in response.get('items', [])]
        
        if video_ids:
            # Get video details for duration
            details_request = youtube.videos().list(
                part='contentDetails,snippet',
                id=','.join(video_ids)
            )
            details_response = details_request.execute()
            
            for item in details_response.get('items', []):
                duration = parse_duration(item['contentDetails']['duration'])
                videos.append({
                    'video_id': item['id'],
                    'title': item['snippet']['title'],
                    'channel': item['snippet']['channelTitle'],
                    'published': item['snippet']['publishedAt'],
                    'duration_minutes': duration
                })
    
    except HttpError as e:
        print(f"Error fetching videos for channel {channel_id}: {e}")
    
    return videos


def search_videos(youtube, query, days_back=4, max_results=25):
    """
    Search YouTube for videos (USES API, COSTS ~100 QUOTA UNITS PER QUERY).
    
    Cannot use RSS feeds for keyword searches, so this remains API-based.
    Used by hybrid and search-type playlists (e.g., 'projects', 'openclaw').
    """
    videos = []
    published_after = (datetime.utcnow() - timedelta(days=days_back)).isoformat() + 'Z'
    
    try:
        request = youtube.search().list(
            part='snippet',
            q=query,
            order='relevance',
            publishedAfter=published_after,
            maxResults=min(max_results, 50),
            type='video'
        )
        response = request.execute()
        
        video_ids = [item['id']['videoId'] for item in response.get('items', [])]
        
        if video_ids:
            details_request = youtube.videos().list(
                part='contentDetails,snippet',
                id=','.join(video_ids)
            )
            details_response = details_request.execute()
            
            for item in details_response.get('items', []):
                duration = parse_duration(item['contentDetails']['duration'])
                videos.append({
                    'video_id': item['id'],
                    'title': item['snippet']['title'],
                    'channel': item['snippet']['channelTitle'],
                    'published': item['snippet']['publishedAt'],
                    'duration_minutes': duration
                })
    
    except HttpError as e:
        print(f"Error searching for '{query}': {e}")
    
    return videos


def find_existing_playlist(youtube, name_prefix):
    """Find existing playlist by name prefix."""
    try:
        request = youtube.playlists().list(
            part='snippet',
            mine=True,
            maxResults=50
        )
        response = request.execute()
        
        for item in response.get('items', []):
            if item['snippet']['title'].startswith(name_prefix):
                return item['id'], item['snippet']['title']
    
    except HttpError as e:
        print(f"Error finding playlist: {e}")
    
    return None, None


def create_playlist(youtube, title, description="Auto-managed by Tony"):
    """Create a new playlist."""
    try:
        request = youtube.playlists().insert(
            part='snippet,status',
            body={
                'snippet': {
                    'title': title,
                    'description': description
                },
                'status': {
                    'privacyStatus': 'private'
                }
            }
        )
        response = request.execute()
        return response['id']
    
    except HttpError as e:
        print(f"Error creating playlist '{title}': {e}")
        return None


def delete_playlist(youtube, playlist_id):
    """Delete a playlist."""
    try:
        youtube.playlists().delete(id=playlist_id).execute()
        return True
    except HttpError as e:
        print(f"Error deleting playlist: {e}")
        return False


def add_video_to_playlist(youtube, playlist_id, video_id):
    """Add a video to a playlist."""
    try:
        youtube.playlistItems().insert(
            part='snippet',
            body={
                'snippet': {
                    'playlistId': playlist_id,
                    'resourceId': {
                        'kind': 'youtube#video',
                        'videoId': video_id
                    }
                }
            }
        ).execute()
        return True
    except HttpError as e:
        if 'Video not found' in str(e) or 'videoNotFound' in str(e):
            print(f"  Video {video_id} not found, skipping")
        elif 'duplicate' in str(e).lower():
            print(f"  Video {video_id} already in playlist")
        else:
            print(f"  Error adding video {video_id}: {e}")
        return False


def dedupe_videos(videos):
    """Remove duplicate videos by title similarity."""
    seen_titles = set()
    unique = []
    
    for video in videos:
        # Simple normalization for deduplication
        normalized = video['title'].lower().strip()
        # Remove common suffixes
        for suffix in ['| full episode', '(full)', '[full]', '- full']:
            normalized = normalized.replace(suffix, '')
        normalized = normalized[:50]  # Compare first 50 chars
        
        if normalized not in seen_titles:
            seen_titles.add(normalized)
            unique.append(video)
    
    return unique


def update_playlist(youtube, config, playlist_key, dry_run=False):
    """Update a single playlist based on config."""
    playlist_config = config.get(playlist_key)
    if not playlist_config or playlist_key.startswith('_'):
        return
    
    print(f"\n{'='*50}")
    print(f"Processing: {playlist_config['name']}")
    print(f"{'='*50}")
    
    playlist_type = playlist_config.get('type', 'channels')
    duration_filter = playlist_config.get('durationFilter', {})
    min_minutes = duration_filter.get('minMinutes', config.get('_durationRules', {}).get('globalMinMinutes', 2))
    max_minutes = duration_filter.get('maxMinutes')
    max_videos = playlist_config.get('maxVideos', 30)
    
    videos = []
    
    # Collect videos based on playlist type
    if playlist_type == 'search':
        # Search-based playlist (API-based, costs quota)
        query = playlist_config.get('searchQuery', playlist_key)
        period_str = playlist_config.get('searchPeriod', '4d')
        days = int(period_str.replace('d', ''))
        
        print(f"[API] Searching for: {query} (last {days} days)")
        videos = search_videos(youtube, query, days_back=days, max_results=max_videos)
    
    elif playlist_type == 'hybrid':
        # Hybrid: channels (RSS) + search (API)
        channels = playlist_config.get('channels', [])
        search_config = playlist_config.get('search', {})
        
        # Get videos from channels via RSS (NO QUOTA COST)
        for channel in channels:
            print(f"[RSS] Fetching from: {channel['name']}")
            channel_videos = get_channel_videos_via_rss(channel['channelId'], channel['name'], days_back=7)
            videos.extend(channel_videos)
        
        # Add search results via API (COSTS QUOTA)
        for query in search_config.get('queries', []):
            print(f"[API] Searching: {query}")
            period_str = search_config.get('searchPeriod', '7d')
            days = int(period_str.replace('d', ''))
            search_videos_list = search_videos(youtube, query, days_back=days, 
                                               max_results=search_config.get('maxSearchResults', 15))
            videos.extend(search_videos_list)
    
    else:
        # Channel-based playlist (RSS-based, NO QUOTA COST)
        channels = playlist_config.get('channels', [])
        for channel in channels:
            print(f"[RSS] Fetching from: {channel['name']}")
            channel_videos = get_channel_videos_via_rss(channel['channelId'], channel['name'], days_back=7)
            videos.extend(channel_videos)
    
    print(f"Total videos found: {len(videos)}")
    
    # Apply filters
    videos = filter_by_duration(videos, min_minutes=min_minutes, max_minutes=max_minutes)
    print(f"After duration filter ({min_minutes}-{max_minutes or '∞'} min): {len(videos)}")
    
    # Deduplicate
    videos = dedupe_videos(videos)
    print(f"After deduplication: {len(videos)}")
    
    # Sort by publish date (oldest first)
    videos.sort(key=lambda x: x['published'], reverse=False)
    
    # Limit to max videos
    videos = videos[:max_videos]
    print(f"Final count (max {max_videos}): {len(videos)}")
    
    if dry_run:
        print("\n[DRY RUN] Would add these videos:")
        for v in videos[:10]:
            print(f"  - {v['title'][:60]}... ({v['duration_minutes']:.0f}min)")
        if len(videos) > 10:
            print(f"  ... and {len(videos) - 10} more")
        return
    
    # Create new playlist: "01 YYMMDD Name" format (date between number and name)
    date_suffix = get_date_suffix()
    name = playlist_config['name']
    name_parts = name.split(' ', 1)
    if len(name_parts) > 1 and name_parts[0].isdigit():
        # "01 News Channels" → "01 260224 News Channels"
        new_title = f"{name_parts[0]} {date_suffix} {name_parts[1]}"
        search_prefix = f"{name_parts[0]} "
    else:
        new_title = f"{date_suffix} {name}"
        search_prefix = date_suffix[:2]  # fallback: match by year prefix

    # Find and delete old playlist
    old_id, old_title = find_existing_playlist(youtube, search_prefix)
    if old_id and old_title != new_title:
        print(f"Deleting old playlist: {old_title}")
        delete_playlist(youtube, old_id)
    
    # Create new playlist
    print(f"Creating playlist: {new_title}")
    playlist_id = create_playlist(youtube, new_title)
    
    if not playlist_id:
        print("Failed to create playlist!")
        return
    
    # Add videos
    added = 0
    for video in videos:
        if add_video_to_playlist(youtube, playlist_id, video['video_id']):
            added += 1
    
    print(f"Added {added}/{len(videos)} videos to playlist")


def main():
    """Main entry point."""
    import argparse
    parser = argparse.ArgumentParser(description='YouTube Smart Playlist Manager')
    parser.add_argument('--playlist', '-p', help='Update specific playlist (key from config)')
    parser.add_argument('--group', '-g', choices=['1', '2'], help='Update playlist group (1=News/Ministry, 2=Discovery/Projects)')
    parser.add_argument('--all', '-a', action='store_true', help='Update all playlists')
    parser.add_argument('--dry-run', '-n', action='store_true', help='Show what would be done without making changes')
    parser.add_argument('--auth', action='store_true', help='Just authenticate and exit')
    args = parser.parse_args()
    
    # Define groups for quota management
    GROUPS = {
        '1': ['news', 'extra_news', 'ai_news', 'ministry'],  # Odd days
        '2': ['openclaw', 'projects', 'peptides']             # Even days
    }
    
    print("YouTube Smart Playlist Manager")
    print("=" * 50)
    
    # Authenticate
    print("Authenticating...")
    youtube = get_authenticated_service()
    print("✓ Authenticated successfully")
    
    if args.auth:
        print("Auth-only mode, exiting.")
        return
    
    # Load config
    config = load_config()
    print(f"✓ Loaded config with {len([k for k in config if not k.startswith('_')])} playlists")
    
    # Determine which playlists to update
    if args.playlist:
        playlists = [args.playlist]
    elif args.group:
        playlists = GROUPS.get(args.group, [])
        print(f"Running group {args.group}: {', '.join(playlists)}")
    elif args.all:
        playlists = [k for k in config.keys() if not k.startswith('_')]
    else:
        print("\nUsage:")
        print("  --playlist KEY  Update a specific playlist")
        print("  --group 1|2     Update playlist group (1=odd days, 2=even days)")
        print("  --all           Update all playlists")
        print("  --dry-run       Preview without making changes")
        print("  --auth          Just authenticate")
        print("\nGroups:")
        print("  Group 1 (odd days):  news, extra_news, ai_news, ministry")
        print("  Group 2 (even days): openclaw, projects, peptides")
        print("\nAvailable playlists:")
        for key in config:
            if not key.startswith('_'):
                print(f"  - {key}: {config[key].get('name', key)}")
        return
    
    # Update playlists
    for playlist_key in playlists:
        if playlist_key in config:
            update_playlist(youtube, config, playlist_key, dry_run=args.dry_run)
        else:
            print(f"Unknown playlist: {playlist_key}")
    
    print("\n" + "=" * 50)
    print("Done!")


if __name__ == '__main__':
    main()