import re
from datetime import datetime
from typing import Dict


CURRENCY_PATTERNS = {
    "MZN": ["mt", "mzn", "metical", "meticais"],
    "USD": ["usd", "$", "dolar", "dólar"],
    "EUR": ["eur", "€", "euro"],
}


def normalize_numbers(text: str) -> str:
    """
    Normaliza números removendo separadores inconsistentes
    Ex: 1.200,50 → 1200.50
    """
    text = re.sub(r"\.(?=\d{3})", "", text)  # remove separador de milhar
    text = re.sub(r",", ".", text)           # normaliza decimal
    return text


def normalize_dates(text: str) -> str:
    """
    Normaliza datas para ISO (YYYY-MM-DD) quando possível
    """
    date_patterns = [
        r"\b\d{2}/\d{2}/\d{4}\b",
        r"\b\d{4}-\d{2}-\d{2}\b",
    ]

    for pattern in date_patterns:
        matches = re.findall(pattern, text)
        for match in matches:
            try:
                if "/" in match:
                    date_obj = datetime.strptime(match, "%d/%m/%Y")
                else:
                    date_obj = datetime.strptime(match, "%Y-%m-%d")

                text = text.replace(match, date_obj.strftime("%Y-%m-%d"))
            except ValueError:
                continue

    return text


def detect_currency(text: str) -> str | None:
    """
    Detecta moeda predominante no texto
    """
    for currency, patterns in CURRENCY_PATTERNS.items():
        for p in patterns:
            if p in text:
                return currency
    return None


def normalize_text(text: str) -> Dict[str, str]:
    """
    Função principal de normalização.
    Retorna texto normalizado + metadados úteis.
    """
    normalized = normalize_numbers(text)
    normalized = normalize_dates(normalized)

    currency = detect_currency(normalized)

    return {
        "normalized_text": normalized,
        "currency": currency,
    }
