#!/usr/bin/env python3
"""
Validate ffplayout schedule JSON files.
Exempts weather and commercials from duration checks.
Uses tolerance of 0.2 seconds for minor mismatches.
"""

import json
import os
import sys
from pathlib import Path
from collections import defaultdict

MEDIA_ROOT = "/var/lib/ffplayout/tv-media"
DEFAULT_DURATIONS = os.path.join(MEDIA_ROOT, "durations.txt")
EXEMPT_PATHS = [
    "weather/weather_report.mp4",
    "Shuffle-for-tv/Commercials/",
    "Shuffle-for-tv/NETV Originals/Bumpers/",
    "Shuffle-for-tv/saturday/",
]
TOLERANCE = 0.2  # seconds

def is_exempt(source):
    """Return True if source is weather, commercial, or bumper (no duration check)."""
    rel = source.replace(MEDIA_ROOT + "/", "")
    for pattern in EXEMPT_PATHS:
        if pattern in rel:
            return True
    return False

def load_durations(path):
    durations = {}
    with open(path, 'r') as f:
        for line in f:
            line = line.strip()
            if not line or line.startswith('==='):
                continue
            parts = line.split('|')
            if len(parts) != 2:
                continue
            dur, rel = parts
            durations[rel] = float(dur)
    return durations

def validate_schedule(sched_path, durations):
    errors = []
    with open(sched_path, 'r') as f:
        data = json.load(f)
    programs = data if isinstance(data, list) else data.get("program", [])
    
    # Group segments by source for split analysis
    seg_groups = defaultdict(list)
    for idx, prog in enumerate(programs):
        src = prog.get("source")
        if not src:
            errors.append(f"Entry {idx}: missing 'source'")
            continue
        rel_src = src.replace(MEDIA_ROOT + "/", "")
        seg_groups[src].append((idx, prog.get("in", 0), prog.get("out", 0), prog.get("duration", 0)))

    for src, segs in seg_groups.items():
        rel = src.replace(MEDIA_ROOT + "/", "")
        # Exempt weather/commercials from most checks
        exempt = is_exempt(src)
        
        # Check file exists
        if not os.path.exists(src):
            errors.append(f"File not found: {src}")
            continue
        
        # Duration check (only for non‑exempt)
        if not exempt and rel in durations:
            expected = durations[rel]
            # Check each segment's duration field
            for idx, _, _, dur in segs:
                if abs(dur - expected) > TOLERANCE:
                    errors.append(f"{rel} (entry {idx}): duration {dur} != expected {expected}")
        elif not exempt and rel not in durations:
            errors.append(f"{rel} not found in durations.txt (and not exempt)")

        # Split continuity (only if multiple segments)
        if len(segs) > 1:
            segs_sorted = sorted(segs, key=lambda x: x[1])  # by 'in'
            first_in = segs_sorted[0][1]
            if first_in > TOLERANCE:
                errors.append(f"{rel}: first segment 'in' = {first_in}, should be 0")
            last_out = segs_sorted[-1][2]
            if last_out < durations.get(rel, last_out) - TOLERANCE:
                errors.append(f"{rel}: last segment ends at {last_out}, total is {durations.get(rel, 'unknown')}")
            # Check for gaps between segments
            for i in range(1, len(segs_sorted)):
                prev_out = segs_sorted[i-1][2]
                curr_in = segs_sorted[i][1]
                if curr_in > prev_out + TOLERANCE:
                    errors.append(f"{rel}: gap between segment {segs_sorted[i-1][0]} (out={prev_out}) and {segs_sorted[i][0]} (in={curr_in})")
        else:
            # Single segment: must cover whole file (unless exempt)
            if not exempt and rel in durations:
                _, _, out, dur = segs[0]
                expected = durations[rel]
                if abs(out - expected) > TOLERANCE or abs(dur - expected) > TOLERANCE:
                    errors.append(f"{rel}: single segment out={out}, duration={dur}, expected {expected}")
    
    return errors

def main():
    if len(sys.argv) < 2:
        print("Usage: python validate_schedules.py /path/to/playlist/dir [durations_file]")
        sys.exit(1)
    playlist_dir = sys.argv[1]
    dur_file = sys.argv[2] if len(sys.argv) > 2 else DEFAULT_DURATIONS
    
    if not os.path.isdir(playlist_dir):
        print(f"Directory not found: {playlist_dir}")
        sys.exit(1)
    if not os.path.exists(dur_file):
        print(f"Durations file not found: {dur_file}")
        sys.exit(1)
    
    durations = load_durations(dur_file)
    print(f"Loaded {len(durations)} entries from {dur_file}")
    
    schedule_files = list(Path(playlist_dir).rglob("*.json"))
    if not schedule_files:
        print(f"No JSON files found in {playlist_dir}")
        return
    
    total_errors = 0
    for sched in schedule_files:
        print(f"\nChecking {sched.relative_to(playlist_dir)} ...")
        errors = validate_schedule(str(sched), durations)
        if errors:
            total_errors += len(errors)
            for err in errors:
                print(f"  {err}")
        else:
            print("  OK")
    
    if total_errors == 0:
        print("\nAll schedules are valid!")
    else:
        print(f"\nFound {total_errors} error(s).")
        sys.exit(1)

if __name__ == "__main__":
    main()
