#!/usr/bin/env python3
"""
produce_week3_audio.py — Weekend audio production script
Generates TTS audio for:
  1. MF Investing Phase 1 Week 3 (MFM Ch. 8 + RPC Intro + Ch. 1)
  2. Elder Training Phase 1 Week 1 (Berkhof Manual: Preface, Intro, Religion, Revelation, Scripture)

Voice: echo | Model: tts-1
Run: python3 produce_week3_audio.py
"""

import os, sys, re, json, subprocess
from pathlib import Path

try:
    from openai import OpenAI
except ImportError:
    print("ERROR: pip3 install openai")
    sys.exit(1)

# ── Config ──────────────────────────────────────────────────────────────────
AUTH_PROFILE = os.path.expanduser("~/.openclaw/auth-profiles.json")
VOICE        = "echo"
MODEL        = "tts-1"
MAX_CHARS    = 4000

MF_BASE   = os.path.expanduser("~/Documents/MF Investing")
EL_BASE   = os.path.expanduser("~/Documents/Elder Training")

MFM_PDF   = f"{MF_BASE}/Multi-Family Millions.pdf"
RPC_PDF   = f"{MF_BASE}/Raising Private Capital Building Your Real Estate.pdf"
BERK_PDF  = f"{EL_BASE}/Berkhof-Manual-of-Christian-Doctrine.pdf"

# Output folders
MF_WEEK3  = f"{MF_BASE}/Phase 1 Week 3 Reading"
EL_WEEK1  = f"{EL_BASE}/Phase 1 Week 1 Reading"

# Line ranges (1-indexed, inclusive)
SECTIONS = [
    # (label_prefix, pdf_path, line_start, line_end, output_folder)
    ("MFM_Ch8",   MFM_PDF,  4533, 5047, MF_WEEK3),  # Where to Get the Money
    ("RPC_Intro", RPC_PDF,   227,  278, MF_WEEK3),  # RPC Introduction
    ("RPC_Ch1",   RPC_PDF,   279,  630, MF_WEEK3),  # Ch1: Intro to Private Capital
    ("BERK_W1",   BERK_PDF,   70,  923, EL_WEEK1),  # Preface + Religion + Revelation + Scripture
]

# ── Helpers ─────────────────────────────────────────────────────────────────

def load_api_key():
    # Try environment variable first (often more current)
    env_key = os.environ.get("OPENAI_API_KEY")
    if env_key:
        return env_key
    with open(AUTH_PROFILE) as f:
        profiles = json.load(f)
    key = profiles.get("openai:manual", {}).get("token")
    if not key:
        raise ValueError("No OpenAI token found")
    return key


def extract_lines(pdf_path, start, end):
    result = subprocess.run(["pdftotext", pdf_path, "-"],
                            capture_output=True, text=True, check=True)
    lines = result.stdout.split('\n')
    return '\n'.join(lines[start - 1 : end])


def clean_text(text):
    text = text.replace('\x0c', '\n\n')
    text = re.sub(r'(\w)-\n(\w)', r'\1\2', text)
    text = re.sub(r'([a-z])\n([a-z])', r'\1 \2', text)
    text = re.sub(r'^\s*\d{1,3}\s*$', '', text, flags=re.MULTILINE)
    text = re.sub(r'\n{3,}', '\n\n', text)
    text = '\n'.join(line.rstrip() for line in text.split('\n'))
    return text.strip()


def chunk_text(text, max_chars=MAX_CHARS):
    paragraphs = re.split(r'\n\n+', text)
    chunks, current, cur_len = [], [], 0
    for para in paragraphs:
        para = para.strip()
        if not para:
            continue
        if len(para) > max_chars:
            if current:
                chunks.append('\n\n'.join(current))
                current, cur_len = [], 0
            sentences = re.split(r'(?<=[.!?])\s+', para)
            sub, sub_len = [], 0
            for s in sentences:
                if sub_len + len(s) + 1 > max_chars and sub:
                    chunks.append(' '.join(sub))
                    sub, sub_len = [s], len(s)
                else:
                    sub.append(s)
                    sub_len += len(s) + 1
            if sub:
                chunks.append(' '.join(sub))
        elif cur_len + len(para) + 2 > max_chars:
            if current:
                chunks.append('\n\n'.join(current))
            current, cur_len = [para], len(para)
        else:
            current.append(para)
            cur_len += len(para) + 2
    if current:
        chunks.append('\n\n'.join(current))
    return chunks


def generate_audio(client, text, path):
    response = client.audio.speech.create(
        model=MODEL, voice=VOICE, input=text, response_format="mp3"
    )
    response.stream_to_file(path)


# ── Main ────────────────────────────────────────────────────────────────────

def main():
    api_key = load_api_key()
    client  = OpenAI(api_key=api_key)

    total_generated = 0
    total_skipped   = 0
    results = []

    for (prefix, pdf, lstart, lend, outdir) in SECTIONS:
        Path(outdir).mkdir(parents=True, exist_ok=True)
        print(f"\n{'='*60}")
        print(f"  {prefix}: lines {lstart}–{lend}")
        print(f"  → {outdir}")
        print(f"{'='*60}")

        raw  = extract_lines(pdf, lstart, lend)
        text = clean_text(raw)
        chunks = chunk_text(text)
        n = len(chunks)
        chars = len(text)
        cost  = chars * 0.000015
        print(f"  {chars:,} chars | {n} chunks | est. ${cost:.3f}")

        gen, skip = 0, 0
        for i, chunk in enumerate(chunks, 1):
            fname = f"{prefix}_Part{i:02d}_of{n:02d}.mp3"
            fpath = str(Path(outdir) / fname)
            if os.path.exists(fpath):
                print(f"  ⏭  {fname} (exists)")
                skip += 1
                continue
            print(f"  🔊 {fname} ({len(chunk):,} chars)... ", end="", flush=True)
            try:
                generate_audio(client, chunk, fpath)
                kb = os.path.getsize(fpath) // 1024
                print(f"✅ {kb} KB")
                gen += 1
            except Exception as e:
                print(f"❌ {e}")

        total_generated += gen
        total_skipped   += skip
        results.append(f"  {prefix}: {gen} generated, {skip} skipped → {outdir.split('/')[-2]}/{outdir.split('/')[-1]}")
        print(f"  Done: {gen} generated, {skip} skipped")

    print(f"\n{'='*60}")
    print("  PRODUCTION COMPLETE")
    print(f"  Total: {total_generated} files generated, {total_skipped} skipped")
    print()
    for r in results:
        print(r)

if __name__ == "__main__":
    main()

# TONY-APPROVED: 2026-03-01 | sha:6829b78a