#!/usr/bin/env python3
"""
Pinterest Automation - Etsy Inventory Sync
Uses Playwright to scrape Etsy shop (bypasses bot detection)
"""

import json
import sys
import re
import time
from datetime import datetime
from pathlib import Path
from typing import Optional
import logging

# Add parent to path for config
sys.path.insert(0, str(Path(__file__).parent))
from config import *

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(LOG_DIR / "etsy-sync.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)


def load_existing_listings() -> dict:
    """Load existing listings JSON or create empty"""
    if LISTINGS_JSON.exists():
        with open(LISTINGS_JSON, 'r') as f:
            return json.load(f)
    return {
        "shop": ETSY_SHOP_NAME,
        "scrapedAt": None,
        "totalListings": 0,
        "listings": [],
        "pinterestBoards": PINTEREST_BOARDS
    }


def save_listings(data: dict):
    """Save listings JSON"""
    DATA_DIR.mkdir(exist_ok=True)
    with open(LISTINGS_JSON, 'w') as f:
        json.dump(data, f, indent=2)
    logger.info(f"Saved {len(data.get('listings', []))} listings to {LISTINGS_JSON}")


def scrape_etsy_shop_playwright() -> list[dict]:
    """
    Scrape Etsy shop using Playwright (bypasses bot detection).
    Returns list of listing dicts with id, title, url, price, category.
    """
    from playwright.sync_api import sync_playwright
    
    listings = []
    
    with sync_playwright() as p:
        # Launch browser with stealth-like settings
        # NOTE: Etsy is aggressively blocking scrapers. Use add_listing.py for manual adds
        # until Etsy API is approved (reminder: Feb 16)
        browser = p.chromium.launch(
            headless=True,
            args=[
                '--disable-blink-features=AutomationControlled',
                '--no-sandbox',
                '--disable-setuid-sandbox',
                '--disable-dev-shm-usage'
            ]
        )
        
        context = browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            locale='en-US',
            timezone_id='America/Los_Angeles'
        )
        
        # Disable webdriver detection
        context.add_init_script("""
            Object.defineProperty(navigator, 'webdriver', {
                get: () => undefined
            });
        """)
        
        page = context.new_page()
        
        page_num = 1
        while True:
            url = f"{ETSY_SHOP_URL}?page={page_num}"
            logger.info(f"Fetching page {page_num}: {url}")
            
            try:
                page.goto(url, wait_until='networkidle', timeout=30000)
                time.sleep(2)  # Let page settle
            except Exception as e:
                logger.error(f"Failed to load page {page_num}: {e}")
                break
            
            # Check for bot detection / captcha (be more specific)
            content_lower = page.content().lower()
            if 'captcha' in content_lower and 'verify' in content_lower:
                logger.warning("Captcha detected - stopping")
                break
            if 'blocked' in content_lower and 'access' in content_lower:
                logger.warning("Access blocked - stopping")
                break
            
            # Find listing links using multiple strategies
            page_listings = []
            
            # Strategy 1: Look for listing cards with data-listing-id
            listing_elements = page.query_selector_all('[data-listing-id]')
            
            # Strategy 2: Look for listing links if strategy 1 fails
            if not listing_elements:
                listing_elements = page.query_selector_all('a[href*="/listing/"]')
            
            if not listing_elements:
                logger.info(f"No listings found on page {page_num}, stopping")
                break
            
            seen_ids = set()
            for elem in listing_elements:
                try:
                    # Get listing ID
                    listing_id = elem.get_attribute('data-listing-id')
                    if not listing_id:
                        href = elem.get_attribute('href') or ''
                        match = re.search(r'/listing/(\d+)/', href)
                        if match:
                            listing_id = match.group(1)
                    
                    if not listing_id or listing_id in seen_ids:
                        continue
                    seen_ids.add(listing_id)
                    
                    # Get title - try multiple approaches
                    title = ""
                    title_elem = elem.query_selector('h3') or elem.query_selector('[class*="title"]')
                    if title_elem:
                        title = title_elem.inner_text().strip()
                    else:
                        # Try aria-label or title attribute
                        title = elem.get_attribute('aria-label') or elem.get_attribute('title') or ""
                    
                    # Get URL
                    href = elem.get_attribute('href') or ''
                    link_elem = elem.query_selector('a[href*="/listing/"]')
                    if link_elem:
                        href = link_elem.get_attribute('href') or href
                    
                    if href.startswith('/'):
                        href = f"https://www.etsy.com{href}"
                    href = href.split('?')[0]  # Remove query params
                    
                    # Get price
                    price = ""
                    price_elem = elem.query_selector('[class*="price"]') or elem.query_selector('[class*="Price"]')
                    if price_elem:
                        price = price_elem.inner_text().strip()
                    
                    # Categorize
                    category = categorize_listing(title)
                    
                    page_listings.append({
                        "id": str(listing_id),
                        "title": title,
                        "url": href,
                        "price": price,
                        "category": category
                    })
                    
                except Exception as e:
                    logger.warning(f"Failed to parse listing element: {e}")
                    continue
            
            if not page_listings:
                logger.info(f"No parseable listings on page {page_num}, stopping")
                break
            
            listings.extend(page_listings)
            logger.info(f"Found {len(page_listings)} listings on page {page_num}")
            
            # Check for next page
            next_btn = page.query_selector('a[aria-label="Next page"]') or page.query_selector('[class*="pagination"] a:last-child')
            if not next_btn:
                break
            
            page_num += 1
            if page_num > 10:  # Safety limit
                break
            
            time.sleep(1)  # Be nice to Etsy
        
        browser.close()
    
    # Deduplicate
    seen_ids = set()
    unique_listings = []
    for listing in listings:
        if listing['id'] not in seen_ids:
            seen_ids.add(listing['id'])
            unique_listings.append(listing)
    
    return unique_listings


def categorize_listing(title: str) -> str:
    """Categorize listing based on title keywords"""
    title_lower = title.lower()
    
    if any(kw in title_lower for kw in ['usmc', 'marine', 'military', 'veteran', 'semper']):
        return "Military"
    elif any(kw in title_lower for kw in ['patriotic', 'usa', 'american', '4th of july', 'flag', 'eagle', '1776', '250']):
        return "Patriotic"
    elif any(kw in title_lower for kw in ['reformed', 'calvin', 'luther', 'huguenot', 'burning bush', 'soli', 'tulip']):
        return "Reformed SVGs"
    elif any(kw in title_lower for kw in ['deer', 'hunting', 'wildlife', 'nature', 'outdoor', 'animal', 'butterfly', 'buck']):
        return "Nature/Animals/Insects"
    elif any(kw in title_lower for kw in ['easter', 'christian', 'faith', 'cross', 'bible', 'church', 'jesus', 'risen', 'tomb', 'lily', 'resurrection']):
        return "Easter SVGs"
    else:
        return "Easter SVGs"  # Default for SVG shop


def merge_listings(existing: list[dict], scraped: list[dict]) -> list[dict]:
    """
    Merge scraped listings with existing, preserving tracking data.
    """
    existing_by_id = {l['id']: l for l in existing}
    scraped_ids = {l['id'] for l in scraped}
    
    merged = []
    
    # Update existing with scraped data, preserve tracking
    for listing in scraped:
        listing_id = listing['id']
        if listing_id in existing_by_id:
            old = existing_by_id[listing_id]
            listing['posted_images'] = old.get('posted_images', [])
            listing['scheduled_images'] = old.get('scheduled_images', [])
            listing['last_posted'] = old.get('last_posted')
            listing['last_scheduled'] = old.get('last_scheduled')
            listing['pinterest_description'] = old.get('pinterest_description')
            listing['priority'] = old.get('priority', False)
        merged.append(listing)
    
    # Mark delisted items
    for listing in existing:
        if listing['id'] not in scraped_ids:
            listing['delisted'] = True
            listing['delisted_at'] = datetime.now().isoformat()
            merged.append(listing)
    
    return merged


def main():
    """Main entry point"""
    logger.info("=" * 50)
    logger.info("Etsy Inventory Sync - Starting (Playwright mode)")
    
    # Load existing
    data = load_existing_listings()
    existing_listings = data.get('listings', [])
    
    # Scrape current listings using Playwright
    scraped_listings = scrape_etsy_shop_playwright()
    
    if not scraped_listings:
        logger.warning("No listings scraped. Keeping existing data.")
        print("WARNING: No listings scraped - Etsy may be blocking or shop empty")
        return
    
    # Merge
    merged = merge_listings(existing_listings, scraped_listings)
    
    # Update data
    data['listings'] = merged
    data['totalListings'] = len([l for l in merged if not l.get('delisted')])
    data['scrapedAt'] = datetime.now().isoformat()
    
    # Save
    save_listings(data)
    
    active_count = data['totalListings']
    logger.info(f"Sync complete: {active_count} active listings")
    logger.info("=" * 50)
    
    print(f"SUCCESS: {active_count} active listings synced")


if __name__ == "__main__":
    main()