import gzip import requests from lxml import etree as ET from deep_translator import GoogleTranslator import time from pathlib import Path # Translation setup TRANSLATOR = GoogleTranslator(source='auto', target='en') BATCH_SIZE = 10 # Process titles in batches for better performance def translate_batch(texts): """Translate a batch of texts more efficiently""" try: # Join texts with special separator (to split back later) combined = " ||| ".join(texts) translated = TRANSLATOR.translate(combined) return translated.split(" ||| ") except Exception as e: print(f"Batch translation failed: {e}") return texts # Return originals if batch fails def translate_titles(titles): """Process titles in batches with priority""" translated_titles = [] # First pass: try all as Hebrew (most Bollywood titles in this EPG are Hebrew) hebrew_batch = [] for title in titles: hebrew_batch.append(title) if len(hebrew_batch) >= BATCH_SIZE: translated_titles.extend(translate_batch(hebrew_batch)) hebrew_batch = [] time.sleep(0.3) # Short delay between batches # Process remaining if hebrew_batch: translated_titles.extend(translate_batch(hebrew_batch)) # Second pass: retry any that weren't translated (fallback to auto) for i, title in enumerate(titles): if translated_titles[i] == title: try: translated_titles[i] = TRANSLATOR.translate(title) time.sleep(0.1) except: pass return translated_titles # Download and parse EPG print("Downloading EPG data...") url = "https://epgshare01.online/epgshare01/epg_ripper_IL1.xml.gz" response = requests.get(url, stream=True) response.raise_for_status() with gzip.open(response.raw) as f: xml_bytes = f.read().replace(b'xmlns="http://xmltv.org/xmltv.dtd"', b'') root = ET.fromstring(xml_bytes) target_id = "Bollywood.Movies.il" channel = root.xpath(f"//channel[@id='{target_id}']")[0] programs = root.xpath(f"//programme[@channel='{target_id}']") # Extract all titles first for batch processing print("Extracting titles for batch translation...") all_titles = [program.find("title").text for program in programs if program.find("title") is not None] # Translate all titles in optimized batches print(f"Translating {len(all_titles)} titles...") translated_titles = translate_titles(all_titles) # Build new EPG print("Building translated EPG...") new_root = ET.Element("tv") new_root.append(channel) for i, program in enumerate(programs): new_program = ET.SubElement(new_root, "programme") # Copy attributes for attr, value in program.items(): new_program.set(attr, value) # Add translated title title_elem = program.find("title") if title_elem is not None: new_title = ET.SubElement(new_program, "title", lang="en") new_title.text = translated_titles[i] if i < len(translated_titles) else title_elem.text # Add other elements for elem in program: if elem.tag == "desc": new_desc = ET.SubElement(new_program, "desc", lang="en") new_desc.text = TRANSLATOR.translate(elem.text) if elem.text else "" time.sleep(0.1) elif elem.tag != "title": new_elem = ET.SubElement(new_program, elem.tag) new_elem.text = elem.text if elem.text else "" for attr, value in elem.items(): new_elem.set(attr, value) # Save compact XML output_dir = Path(__file__).parent / "tempest_config" / "epg" output_file = output_dir / "yes_bollywood.xml" print("Saving EPG file...") with open(output_file, "wb") as f: f.write(b'\n\n') f.write(ET.tostring(new_root[0], encoding='utf-8') + b'\n') for program in new_root[1:]: f.write(ET.tostring(program, encoding='utf-8') + b'\n') f.write(b'') print("✅ Translation complete! Saved to 'yes_bollywood.xml'")