import sys
import re
from datetime import datetime
from pathlib import Path

from src.infra.document_analysis.loaders.dispatcher import load_document
from src.infra.document_analysis.preprocess.text_cleaner import clean_text


# -------------------------
# REGEX
# -------------------------
DATE_RE = re.compile(r"\b\d{2}/\d{2}/\d{4}\b")

AMOUNT_RE = re.compile(
    r"(?<!\w)(\d{1,3}(?:[ .]\d{3})*(?:[.,]\d{2})|\d+(?:[.,]\d{2}))(?!\w)"
)

HEADER_BLACKLIST = [
    "processado por computador",
    "www.",
    "dados de cliente",
    "dados da conta",
    "account data",
    "customer data",
    "saldo inicial",
    "saldo final",
]


def normalize_amount(raw: str) -> float:
    raw = raw.replace(" ", "")
    if "." in raw and "," in raw:
        raw = raw.replace(".", "").replace(",", ".")
    elif "," in raw:
        raw = raw.replace(",", ".")
    try:
        return float(raw)
    except ValueError:
        return 0.0


def is_noise_block(text: str) -> bool:
    text = text.lower()
    return any(k in text for k in HEADER_BLACKLIST)


def main():
    if len(sys.argv) < 2:
        print("❌ Caminho do extrato não informado.")
        sys.exit(1)

    file_path = Path(sys.argv[1])

    print("\n📄 Arquivo:", file_path)
    print("🔎 Iniciando leitura...\n")

    raw_text = load_document(str(file_path))
    text = clean_text(raw_text)

    print("🏦 DEBUG – SEGMENTAÇÃO POR DATAS (FASE 2 + 3.1)")
    print("=" * 120)

    # -----------------------------
    # 1️⃣ Encontrar datas
    # -----------------------------
    matches = list(DATE_RE.finditer(text))

    if len(matches) < 2:
        print("❌ Menos de duas datas encontradas.")
        sys.exit(0)

    print(f"📅 Datas detectadas: {len(matches)}\n")

    # -----------------------------
    # 2️⃣ Criar blocos
    # -----------------------------
    blocks = []

    for i in range(len(matches)):
        start = matches[i].start()
        end = matches[i + 1].start() if i + 1 < len(matches) else len(text)

        try:
            date = datetime.strptime(matches[i].group(), "%d/%m/%Y")
        except ValueError:
            continue

        block_text = text[start:end].strip()
        blocks.append((date, block_text))

    # -----------------------------
    # 3️⃣ Analisar blocos
    # -----------------------------
    balances = []

    for idx, (date, block) in enumerate(blocks, start=1):
        print(f"\n🧾 TRANSAÇÃO {idx}")
        print("-" * 100)
        print(f"📅 Data: {date.strftime('%d/%m/%Y')}")

        if is_noise_block(block):
            print("🚫 BLOCO IGNORADO (cabeçalho/rodapé)")
            continue

        amounts_raw = AMOUNT_RE.findall(block)
        amounts = [normalize_amount(a) for a in amounts_raw if normalize_amount(a) > 0]

        print(f"📄 Texto (300 chars): {block[:300].replace(chr(10), ' ')}...")
        print(f"💰 Valores encontrados: {amounts}")

        if len(amounts) < 2:
            print("⚠️ BLOCO IGNORADO (valores insuficientes)")
            continue

        saldo = max(amounts)

        if saldo <= 0:
            print("⚠️ BLOCO IGNORADO (saldo inválido)")
            continue

        balances.append((date, saldo))
        print(f"✅ Saldo inferido: {saldo:,.2f}")

    # -----------------------------
    # 4️⃣ Fluxo financeiro
    # -----------------------------
    print("\n" + "=" * 120)
    print("📊 TESTE DE FLUXO – DADOS LIMPOS (FASE 3.1)")
    print("=" * 120)

    if len(balances) < 2:
        print("❌ Dados insuficientes para análise de fluxo.")
        sys.exit(0)

    balances.sort(key=lambda x: x[0])

    total_creditos = 0.0
    total_debitos = 0.0
    prev = balances[0][1]

    for _, saldo in balances[1:]:
        delta = saldo - prev
        if delta > 0:
            total_creditos += delta
        elif delta < 0:
            total_debitos += abs(delta)
        prev = saldo

    print(f"➕ Total Créditos: {total_creditos:,.2f}")
    print(f"➖ Total Débitos:  {total_debitos:,.2f}")
    print(f"📈 Fluxo Líquido:  {(total_creditos - total_debitos):,.2f}")

    print("\n✅ FIM DO TESTE – FASE 3.1\n")


if __name__ == "__main__":
    main()
