import os
import cloudscraper
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from openpyxl import Workbook, load_workbook
from openpyxl.styles import PatternFill, Font
import time
import pytz

# === Setup ===
EXCEL_FILE = 'Historic News Data.xlsx'
START_DATE = datetime(2024, 7, 1)
END_DATE = datetime(2025, 4, 22)

HEADERS = [
    "Date", "Time", "DateTime", "Currency", "Event",
    "Actual", "Forecast", "Difference", "Result"
]

# === Colors ===
FILL_GREEN = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
FILL_RED = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
HEADER_FONT = Font(bold=True, size=12)
NORMAL_FONT = Font(size=11)

# === Initialize scraper ===
scraper = cloudscraper.create_scraper()

# === Excel Setup ===
def init_workbook():
    if not os.path.exists(EXCEL_FILE):
        wb = Workbook()
        ws = wb.active
        ws.title = "All"
        ws.append(HEADERS)
        for col in range(1, len(HEADERS) + 1):
            ws.cell(row=1, column=col).font = HEADER_FONT
        wb.save(EXCEL_FILE)

def clean_value(val):
    if isinstance(val, str):
        val = val.replace(',', '').replace('K', '000').replace('M', '000000')
        val = val.replace('%', '')
    return float(val) if val else 0.0

def calculate_percentage_diff(actual, forecast):
    try:
        actual_num = clean_value(actual)
        forecast_num = clean_value(forecast)
        if forecast_num == 0:
            return 0.0
        return ((actual_num - forecast_num) / forecast_num) * 100
    except (ValueError, TypeError):
        return None

def get_result_from_classes(row):
    actual_cell = row.find("td", class_="calendar__actual")
    if not actual_cell:
        return "Neutral"
    span = actual_cell.find("span")
    if span and span.has_attr("class"):
        classes = span["class"]
        if "better" in classes:
            return "Better"
        elif "worse" in classes:
            return "Worse"
    row_classes = row.get("class", [])
    if "better" in row_classes:
        return "Better"
    elif "worse" in row_classes:
        return "Worse"
    return "Neutral"

def get_fill_for_result(result):
    if result == "Better":
        return FILL_GREEN
    elif result == "Worse":
        return FILL_RED
    return None

def style_headers(sheet):
    for col in range(1, len(HEADERS) + 1):
        sheet.cell(row=1, column=col).font = HEADER_FONT

def parse_and_save(html, target_date):
    soup = BeautifulSoup(html, 'html.parser')
    rows = soup.find_all("tr", class_="calendar__row")

    parsed_rows = []
    current_time_str = None

    local_zone = pytz.timezone("Asia/Karachi")        # Your system/browser shows times in Pakistan time
    target_zone = pytz.FixedOffset(180)               # IC Markets server time (UTC+3)

    for row in rows:
        time_cell = row.find("td", class_="calendar__time")
        time_text = time_cell.text.strip() if time_cell else ""
        if time_text and time_text.lower() != "all day":
            current_time_str = time_text

        if not current_time_str:
            continue

        impact_cell = row.find("td", class_="calendar__impact")
        if not impact_cell:
            continue
        impact_icon = impact_cell.find("span")
        if not impact_icon or 'icon--ff-impact-red' not in impact_icon.get("class", []):
            continue

        try:
            naive_dt = datetime.strptime(f"{target_date.strftime('%Y-%m-%d')} {current_time_str}", "%Y-%m-%d %I:%M%p")
            localized_dt = local_zone.localize(naive_dt)
            server_dt = localized_dt.astimezone(target_zone)
        except Exception:
            continue

        currency_tag = row.find("td", class_="calendar__currency")
        event_tag = row.find("td", class_="calendar__event")
        actual_tag = row.find("td", class_="calendar__actual")
        forecast_tag = row.find("td", class_="calendar__forecast")

        if not currency_tag or not event_tag:
            continue

        currency = currency_tag.text.strip()
        event = event_tag.text.strip()
        actual = actual_tag.text.strip() if actual_tag else ""
        forecast = forecast_tag.text.strip() if forecast_tag else ""

        if actual in ["", "-", "—"]:
            continue

        percentage_diff = calculate_percentage_diff(actual, forecast)
        result = get_result_from_classes(row)

        row_data = [
            server_dt.strftime("%Y-%m-%d"),
            server_dt.strftime("%H:%M"),
            server_dt.strftime("%Y-%m-%d %H:%M"),
            currency,
            event,
            actual,
            forecast,
            f"{percentage_diff:.2f}%" if percentage_diff is not None else "",
            result
        ]
        parsed_rows.append((row_data, result))

    if parsed_rows:
        wb = load_workbook(EXCEL_FILE)

        def write_row(sheet, row_data, result):
            ws = wb[sheet]
            row = ws.max_row + 1
            for col, value in enumerate(row_data, start=1):
                cell = ws.cell(row=row, column=col, value=value)
                cell.font = NORMAL_FONT
                if col == 9:
                    fill = get_fill_for_result(result)
                    if fill:
                        cell.fill = fill

        if "All" not in wb.sheetnames:
            ws = wb.create_sheet("All")
            ws.append(HEADERS)
            style_headers(ws)

        for row_data, result in parsed_rows:
            write_row("All", row_data, result)
            currency = row_data[3]
            if currency not in wb.sheetnames:
                currency_ws = wb.create_sheet(currency)
                currency_ws.append(HEADERS)
                style_headers(currency_ws)
            write_row(currency, row_data, result)

        wb.save(EXCEL_FILE)
        print(f"✅ Saved {len(parsed_rows)} items for {target_date.strftime('%b %d')}")
    else:
        print(f"ℹ️ No high-impact items for {target_date.strftime('%b %d')}")

# === Main ===
init_workbook()
current_date = END_DATE
while current_date >= START_DATE:
    date_str = current_date.strftime("%b%d.%Y")
    url = f"https://www.forexfactory.com/calendar?day={date_str}"
    print(f"\n📅 Scraping: {current_date.strftime('%Y-%m-%d')} -> {url}")

    try:
        response = scraper.get(url, timeout=10)
        if response.status_code == 200:
            parse_and_save(response.text, current_date)
        else:
            print(f"❌ HTTP {response.status_code} for {url}")
    except Exception as e:
        print(f"❌ Error fetching {url}: {str(e)}")

    current_date -= timedelta(days=1)
    time.sleep(1)

print("\n✅ All done!")