#!/usr/bin/env python3
"""
YouTube RSS Poller — Extract trends from YouTube channel RSS feeds.
Loads channel list from youtube-channels.json.
"""

import json
import xml.etree.ElementTree as ET
import requests
from typing import List, Dict

YOUTUBE_CHANNELS_FILE = "/Users/tonyclaw/.openclaw/workspace/youtube-channels.json"


def load_youtube_channels() -> List[str]:
    """
    Load YouTube channel IDs from youtube-channels.json.
    Parses nested structure with categories containing channel objects.
    """
    try:
        with open(YOUTUBE_CHANNELS_FILE, "r") as f:
            data = json.load(f)
        
        # Extract all channel IDs from nested structure
        channel_ids = []
        for category, category_data in data.items():
            if isinstance(category_data, dict) and "channels" in category_data:
                for channel in category_data["channels"]:
                    if isinstance(channel, dict) and "channelId" in channel:
                        channel_ids.append(channel["channelId"])
        
        return channel_ids
    except Exception as e:
        print(f"[WARN] Failed to load YouTube channels: {e}")
        return []


def fetch_channel_feed(channel_id: str) -> List[Dict]:
    """
    Fetch RSS feed for a YouTube channel.
    Args:
        channel_id: YouTube channel ID
    Returns:
        List of video dicts with title, url
    """
    try:
        rss_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}"
        response = requests.get(rss_url, timeout=10)
        response.raise_for_status()
        
        root = ET.fromstring(response.content)
        
        # Parse RSS namespace
        ns = {
            "yt": "http://www.youtube.com/xml/schemas/youtube.rss",
            "media": "http://search.yahoo.com/mrss/"
        }
        
        videos = []
        for entry in root.findall("atom:entry", {"atom": "http://www.w3.org/2005/Atom"}):
            title_elem = entry.find("{http://www.w3.org/2005/Atom}title")
            link_elem = entry.find("{http://www.w3.org/2005/Atom}link")
            
            if title_elem is not None and link_elem is not None:
                videos.append({
                    "title": title_elem.text or "",
                    "url": link_elem.get("href", "")
                })
        
        return videos[:3]  # Latest 3 videos
    except Exception as e:
        print(f"[WARN] Failed to fetch YouTube feed for {channel_id}: {e}")
        return []


def get_youtube_signals() -> List[Dict]:
    """
    Get trending signals from YouTube RSS feeds.
    Returns signals across all tracked channels.
    Assigns signals to niches based on source data.
    """
    channel_ids = load_youtube_channels()
    all_signals = []
    
    # Niche detection (simplified - assign based on channel position/order)
    niches_list = [
        "USMC / Military",
        "Military Family",
        "Reformed Christian",
        "Patriotic",
        "Print on Demand",
        "AI Services / Small Business"
    ]
    
    for idx, channel_id in enumerate(channel_ids):
        videos = fetch_channel_feed(channel_id)
        # Distribute channels across niches for signal diversity
        niche = niches_list[idx % len(niches_list)]
        
        for video in videos:
            all_signals.append({
                "source": "youtube",
                "niche": niche,
                "title": video["title"],
                "url": video["url"],
                "engagement": 0  # YouTube RSS doesn't expose view counts
            })
    
    return all_signals


if __name__ == "__main__":
    # Test run
    signals = get_youtube_signals()
    print(f"Found {len(signals)} YouTube signals")
    for sig in signals[:5]:
        print(f"  [{sig['niche']}] {sig['title']}")
