#!/usr/bin/env python3
"""
TessyBase Lieferzeit-Scraper
============================
Liest aktuelle Lieferzeiten von tesla.com/de_de und aktualisiert models.json

Setup:
    pip install playwright python-dateutil
    playwright install chromium

Cron (täglich 06:00 Uhr):
    0 6 * * * /usr/bin/python3 /var/www/tessybase/scraper.py >> /var/log/tessybase.log 2>&1
"""

import json
import os
import re
import sys
import logging
from datetime import datetime
from pathlib import Path

# ── Logging ──────────────────────────────────────────────────────────────────
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S"
)
log = logging.getLogger("tessybase-scraper")

# ── Config ───────────────────────────────────────────────────────────────────
BASE_DIR    = Path(__file__).parent
DATA_FILE   = BASE_DIR / "data" / "models.json"
BACKUP_DIR  = BASE_DIR / "data" / "backups"

TESLA_URLS = {
    "model-y-rwd":  "https://www.tesla.com/de_DE/model-y/design#overview",
    "model-y-lr":   "https://www.tesla.com/de_DE/model-y/design#overview",
    "model-y-perf": "https://www.tesla.com/de_DE/model-y/design#overview",
    "model-3-rwd":  "https://www.tesla.com/de_DE/model3/design#overview",
    "model-3-lr":   "https://www.tesla.com/de_DE/model3/design#overview",
    "model-3-perf": "https://www.tesla.com/de_DE/model3/design#overview",
}

# Selektoren für Lieferzeiten auf tesla.com (Stand 2025)
# Tesla ändert manchmal die Struktur — hier anpassen falls nötig
DELIVERY_SELECTORS = [
    "[data-id='delivery-timing']",
    ".delivery-timing",
    "[class*='DeliveryTiming']",
    "[class*='delivery']",
    "span[class*='timing']",
]

# Fallback-Regex für Wochen-Angaben im Seitentext
WEEKS_PATTERN = re.compile(
    r"(\d+)\s*[–\-]\s*(\d+)\s*Wochen|"
    r"(\d+)\s*Wochen|"
    r"(\d+)\s*[–\-]\s*(\d+)\s*weeks|"
    r"(\d+)\s*weeks",
    re.IGNORECASE
)


# ── Scraper ───────────────────────────────────────────────────────────────────
def scrape_delivery_times() -> dict:
    """Scrapt Lieferzeiten von tesla.com mit Playwright (headless Chromium)."""
    try:
        from playwright.sync_api import sync_playwright
    except ImportError:
        log.error("Playwright nicht installiert. Bitte: pip install playwright && playwright install chromium")
        sys.exit(1)

    results = {}

    with sync_playwright() as p:
        log.info("Starte headless Chromium...")
        browser = p.chromium.launch(
            headless=True,
            args=["--no-sandbox", "--disable-setuid-sandbox"]
        )
        context = browser.new_context(
            locale="de-DE",
            timezone_id="Europe/Berlin",
            user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
                       "AppleWebKit/537.36 (KHTML, like Gecko) "
                       "Chrome/122.0.0.0 Safari/537.36"
        )

        # Model Y
        results.update(_scrape_model(context, "model-y", [
            "model-y-rwd", "model-y-lr", "model-y-perf"
        ], TESLA_URLS["model-y-rwd"]))

        # Model 3
        results.update(_scrape_model(context, "model-3", [
            "model-3-rwd", "model-3-lr", "model-3-perf"
        ], TESLA_URLS["model-3-rwd"]))

        browser.close()

    return results


def _scrape_model(context, model_name: str, model_ids: list, url: str) -> dict:
    """Scrapt eine Tesla-Modellseite und extrahiert Lieferzeiten."""
    results = {}
    page = context.new_page()

    try:
        log.info(f"Lade {url} ...")
        page.goto(url, wait_until="networkidle", timeout=30000)

        # Warte auf Seiteninhalt
        page.wait_for_timeout(3000)

        # Versuche spezifische Selektoren
        delivery_text = _try_selectors(page)

        if not delivery_text:
            # Fallback: ganzen Seitentext durchsuchen
            full_text = page.inner_text("body")
            delivery_text = _extract_delivery_from_text(full_text)
            log.info(f"  Fallback-Text-Extraktion: {delivery_text!r}")
        else:
            log.info(f"  Selektor-Treffer: {delivery_text!r}")

        # Parse Wochen aus dem gefundenen Text
        weeks = _parse_weeks(delivery_text or "")

        for mid in model_ids:
            if weeks:
                results[mid] = {
                    "weeks_min": weeks[0],
                    "weeks_max": weeks[1],
                    "raw": delivery_text
                }
                log.info(f"  ✓ {mid}: {weeks[0]}–{weeks[1]} Wochen")
            else:
                results[mid] = None
                log.warning(f"  ⚠ {mid}: Keine Lieferzeit gefunden")

    except Exception as e:
        log.error(f"Fehler beim Scrapen von {url}: {e}")
        for mid in model_ids:
            results[mid] = None
    finally:
        page.close()

    return results


def _try_selectors(page):
    """Probiert bekannte CSS-Selektoren für Lieferzeit-Elemente."""
    for sel in DELIVERY_SELECTORS:
        try:
            els = page.query_selector_all(sel)
            for el in els:
                text = el.inner_text().strip()
                if text and any(w in text.lower() for w in ["woche", "week", "lieferung", "delivery"]):
                    return text
        except Exception:
            continue
    return None


def _extract_delivery_from_text(text: str):
    """Sucht im Volltext nach Lieferzeit-Angaben."""
    lines = text.split("\n")
    for line in lines:
        line = line.strip()
        if len(line) < 100 and WEEKS_PATTERN.search(line):
            return line
    return None


def _parse_weeks(text: str):
    """Extrahiert Wochen-Range aus Text. Gibt (min, max) zurück."""
    if not text:
        return None
    m = WEEKS_PATTERN.search(text)
    if not m:
        return None
    groups = m.groups()
    # Pattern 1: X–Y Wochen (deutsch)
    if groups[0] and groups[1]:
        return int(groups[0]), int(groups[1])
    # Pattern 2: X Wochen
    if groups[2]:
        w = int(groups[2])
        return w, w + 2
    # Pattern 3: X-Y weeks (englisch)
    if groups[3] and groups[4]:
        return int(groups[3]), int(groups[4])
    # Pattern 4: X weeks
    if groups[5]:
        w = int(groups[5])
        return w, w + 2
    return None


# ── JSON-Update ───────────────────────────────────────────────────────────────
def update_models_json(scraped: dict):
    """Liest models.json, aktualisiert Lieferzeiten, schreibt zurück."""
    DATA_FILE.parent.mkdir(parents=True, exist_ok=True)

    # Backup
    if DATA_FILE.exists():
        BACKUP_DIR.mkdir(parents=True, exist_ok=True)
        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
        backup_file = BACKUP_DIR / f"models_{ts}.json"
        backup_file.write_text(DATA_FILE.read_text())
        log.info(f"Backup erstellt: {backup_file}")

    # Lade aktuelle Daten
    if DATA_FILE.exists():
        db = json.loads(DATA_FILE.read_text())
    else:
        log.warning("models.json nicht gefunden — erstelle neue Datei")
        db = {"models": [], "delivery_times": {"models": []}}

    # Aktualisiere Lieferzeiten
    updated_count = 0
    for lz in db.get("delivery_times", {}).get("models", []):
        mid = lz["id"]
        if mid in scraped and scraped[mid]:
            old_min, old_max = lz.get("weeks_min", 0), lz.get("weeks_max", 0)
            lz["weeks_min"] = scraped[mid]["weeks_min"]
            lz["weeks_max"] = scraped[mid]["weeks_max"]
            lz["raw_text"]  = scraped[mid].get("raw", "")
            if old_min != lz["weeks_min"] or old_max != lz["weeks_max"]:
                log.info(f"  Geändert: {mid}: {old_min}–{old_max} → {lz['weeks_min']}–{lz['weeks_max']} Wochen")
                updated_count += 1

    db["delivery_times"]["last_updated"] = datetime.now().isoformat()
    db["delivery_times"]["source"] = "tesla.com (automatisch)"
    db["last_updated"] = datetime.now().isoformat()

    # Schreibe zurück
    DATA_FILE.write_text(json.dumps(db, ensure_ascii=False, indent=2))
    log.info(f"models.json aktualisiert · {updated_count} Änderungen")
    return updated_count


# ── Main ──────────────────────────────────────────────────────────────────────
def main():
    log.info("=" * 50)
    log.info("TessyBase Scraper gestartet")
    log.info(f"Zieldatei: {DATA_FILE}")
    log.info("=" * 50)

    # Scrape
    scraped = scrape_delivery_times()

    # Zusammenfassung
    found    = sum(1 for v in scraped.values() if v)
    not_found = sum(1 for v in scraped.values() if not v)
    log.info(f"Ergebnis: {found} gefunden, {not_found} nicht gefunden")

    # JSON aktualisieren
    changes = update_models_json(scraped)

    log.info(f"Fertig · {changes} Werte aktualisiert")
    log.info("=" * 50)

    # Exit-Code 0 = OK, 1 = teilweise fehlgeschlagen
    sys.exit(0 if not_found == 0 else 1)


if __name__ == "__main__":
    main()
