#!/usr/bin/env python3 """ tvwish EPG Scraper - Selenium Version (for JS-rendered content) Scrapes program schedules from tvwish.com and generates XMLTV EPG format for all configured channels. """ import xml.etree.ElementTree as ET from xml.dom.minidom import parseString from datetime import datetime, timedelta import pytz from pathlib import Path import time import re import sys import signal from selenium import webdriver from selenium.webdriver.chrome.options import Options from bs4 import BeautifulSoup # Global exit flag stop_flag = False # Default config DEFAULT_PROGRAM_DURATION = timedelta(minutes=30) INDIA_TZ = pytz.timezone('Asia/Kolkata') # Signal handler for graceful Ctrl+C signal.signal(signal.SIGINT, lambda sig, frame: (print("\nInterrupted."), sys.exit(0))) def get_channel_mapping(): config_path = Path(__file__).parent / "tempest_config" / "tvwish.config.xml" print(f"[INFO] Loading config from {config_path}") if not config_path.exists(): raise FileNotFoundError(f"Config file not found: {config_path}") tree = ET.parse(config_path) channel_map = {} for channel in tree.findall(".//channel"): site_id = channel.get("site_id") display_name = channel.text.strip() if channel.text else channel.get("xmltv_id") url_name = display_name.lower().replace(' ', '-') url_name = re.sub(r'[^a-z0-9-]', '', url_name) # Manual fixes if site_id == "20": url_name = "and-pictures" elif site_id == "8": url_name = "and-pictures-hd" elif site_id == "1377": url_name = "and-xplor-hd" channel_map[site_id] = { 'xmltv_id': channel.get("xmltv_id"), 'display_name': display_name, 'url_name': url_name } return channel_map def setup_driver(): options = Options() options.add_argument('--headless') options.add_argument('--disable-gpu') options.add_argument('--no-sandbox') options.add_argument('--window-size=1920,1080') return webdriver.Chrome(options=options) def parse_schedule(channel_url): driver = setup_driver() print(f"[INFO] Opening URL: {channel_url}") driver.get(channel_url) time.sleep(5) soup = BeautifulSoup(driver.page_source, 'html.parser') driver.quit() programs = [] items = soup.select("div.card.schedule-item") print(f"[INFO] Found {len(items)} schedule items") current_date = datetime.now(INDIA_TZ).strftime("%d %b %Y") for item in items: try: time_str = item.select_one("div.card-header h3").get_text(strip=True) title = item.select_one("h4.text-warning").get_text(strip=True) desc_el = item.select_one("p.my-2") description = desc_el.get_text(strip=True) if desc_el else "" genre_tags = item.select("a.badge.bg-info") genre = ", ".join(g.get_text(strip=True) for g in genre_tags) dt = datetime.strptime(f"{current_date} {time_str}", "%d %b %Y %I:%M %p") start_time = INDIA_TZ.localize(dt) programs.append({ "title": title, "description": description, "genre": genre, "start": start_time }) except Exception as e: print(f"[WARN] Skipping item: {e}") continue programs.sort(key=lambda x: x["start"]) for i in range(len(programs)): end_time = programs[i+1]["start"] if i+1 < len(programs) else programs[i]["start"] + DEFAULT_PROGRAM_DURATION programs[i]["end"] = end_time return programs def generate_epg(channels_data): print("[INFO] Generating XMLTV EPG...") output_dir = Path(__file__).parent / "tempest_config" / "epg" output_file = output_dir / "tvwish_epg.xml" output_dir.mkdir(parents=True, exist_ok=True) root = ET.Element("tv", {"generator-info-name": "tvwish EPG Generator"}) for ch_id, ch_info in channels_data.items(): programs = ch_info['programs'] if not programs: continue ch_elem = ET.SubElement(root, "channel", {"id": ch_info['xmltv_id']}) ET.SubElement(ch_elem, "display-name").text = ch_info['display_name'] for prog in programs: start = prog["start"].astimezone(pytz.utc).strftime("%Y%m%d%H%M%S +0000") end = prog["end"].astimezone(pytz.utc).strftime("%Y%m%d%H%M%S +0000") pr = ET.SubElement(root, "programme", { "start": start, "stop": end, "channel": ch_info['xmltv_id'] }) ET.SubElement(pr, "title", {"lang": "en"}).text = prog["title"] if prog["genre"]: ET.SubElement(pr, "category", {"lang": "en"}).text = prog["genre"] if prog["description"]: ET.SubElement(pr, "desc", {"lang": "en"}).text = prog["description"] xml_str = ET.tostring(root, encoding="utf-8") pretty = parseString(xml_str).toprettyxml() with open(output_file, "w", encoding="utf-8") as f: f.write(pretty) print(f"[SUCCESS] EPG written to {output_file.absolute()}") def main(): print("tvwish EPG Scraper - Selenium Version") print("====================================") try: channel_map = get_channel_mapping() all_channels = {} for site_id, info in channel_map.items(): if stop_flag: break url = f"https://www.tvwish.com/{info['url_name']}/Channel/{site_id}/Schedule/Today" print(f"[INFO] Scraping: {info['display_name']} ({site_id})") programs = parse_schedule(url) if programs: all_channels[site_id] = { 'xmltv_id': info['xmltv_id'], 'display_name': info['display_name'], 'programs': programs } print(f"[INFO] {len(programs)} programs found for {info['display_name']}") else: print(f"[WARN] No programs found for {info['display_name']}") if all_channels: generate_epg(all_channels) else: print("[ERROR] No programs were extracted for any channel") except Exception as e: print(f"[FATAL] {e}") if __name__ == '__main__': main()