# src/infra/document_analysis/analyzers/document_analyst.py

from typing import List

from src.infra.document_analysis.analyzers.bank_statement_analyzer import (
    analyze_bank_statement,
)
from src.infra.document_analysis.analyzers.alvara_analyzer import analyze_alvara
from src.infra.document_analysis.analyzers.certidao_registo_comercial_analyzer import (
    analyze_certidao_registo_comercial,
)
from src.infra.document_analysis.analyzers.nuit_analyzer import analyze_nuit

from src.infra.document_analysis.loaders.dispatcher import load_document
from src.infra.document_analysis.preprocess.text_cleaner import clean_text
from src.infra.document_analysis.detectors.document_type_detector import (
    detect_document_type,
)
from src.infra.document_analysis.extractors.entities_extractor import extract_entities

from src.infra.document_analysis.validators.coherence_validator import (
    validate_coherence,
)
from src.infra.document_analysis.validators.credibility_validator import (
    validate_credibility,
)
from src.infra.document_analysis.validators.cross_document_validator import (
    CrossDocumentValidator
)

from src.infra.document_analysis.scorers.legibility_scorer import score_legibility
from src.infra.document_analysis.scorers.structure_scorer import score_structure

from src.infra.document_analysis.schemas import DocumentAnalysisResult

from src.infra.document_analysis.builders.profile_builder import (
    build_profile_from_alvara,
    build_profile_from_certidao,
    build_profile_from_nuit,
)

from src.infra.document_analysis.reports.company_consistency_report import (
    build_company_consistency_report,
)


class DocumentAnalyst:
    """
    Analisa documentos legais e financeiros e produz:
    - DocumentAnalysisResult por documento
    - CompanyConsistencyReport agregado (única fonte de verdade legal)
    """

    def __init__(self, ai_adapter=None):
        self.ai_adapter = ai_adapter

    def analyze_documents(self, file_paths: List[str]) -> List[DocumentAnalysisResult]:
        results: List[DocumentAnalysisResult] = []
        all_entities: List[dict] = []
        company_profiles = []

        for path in file_paths:
            alerts: List[str] = []
            features: dict = {}

            # =====================================================
            # 1️⃣ LOAD
            # =====================================================
            try:
                raw_text = load_document(path)
            except Exception as e:
                results.append(
                    DocumentAnalysisResult(
                        document_id=path,
                        document_type_detected="ERRO",
                        document_type_confidence=0.0,
                        legibilidade_score=0.0,
                        estrutura_score=0.0,
                        coerencia_score=0.0,
                        credibilidade_score=0.0,
                        extracted_entities={},
                        alerts=[f"Erro ao processar documento: {str(e)}"],
                        features={"document_error": 1.0},
                    )
                )
                continue

            text = clean_text(raw_text)

            if not text:
                alerts.append("Documento sem texto legível")

            # =====================================================
            # 2️⃣ DETECÇÃO DE TIPO
            # =====================================================
            doc_type, doc_conf = detect_document_type(text)

            if doc_type == "desconhecido":
                alerts.append("Tipo de documento não reconhecido")

            # =====================================================
            # 3️⃣ EXTRAÇÃO DE ENTIDADES
            # =====================================================
            entities = extract_entities(text)
            all_entities.append(entities)

            if not entities:
                alerts.append("Nenhuma entidade relevante extraída")

            # =====================================================
            # 4️⃣ SCORERS BASE
            # =====================================================
            leg = score_legibility(text)
            struct = score_structure(raw_text)
            coh = validate_coherence(entities)
            cred = validate_credibility(text, entities, doc_type)

            if leg < 0.4:
                alerts.append("Baixa legibilidade do documento")

            if cred < 0.4:
                alerts.append("Baixa credibilidade do documento")

            # =====================================================
            # 5️⃣ ANÁLISE ESPECÍFICA POR TIPO
            # =====================================================
            try:
                if doc_type == "extrato_bancario":
                    features["bank_statement_features"] = analyze_bank_statement(text)

                elif doc_type == "alvara_comercial":
                    features["alvara_features"] = analyze_alvara(
                        text=text,
                        extracted_entities=entities,
                    )

                elif doc_type == "certidao_registo":
                    features["certidao_registo_features"] = (
                        analyze_certidao_registo_comercial(text)
                    )

                elif doc_type == "nuit_atribuicao":
                    features["nuit_features"] = analyze_nuit(text)

            except Exception as ex:
                alerts.append(f"Falha na análise específica do documento: {str(ex)}")

            # =====================================================
            # 6️⃣ PERFIL UNIFICADO (CompanyProfile)
            # =====================================================
            try:
                profile = None

                if doc_type == "alvara_comercial" and "alvara_features" in features:
                    profile = build_profile_from_alvara(
                        features["alvara_features"], entities
                    )

                elif (
                    doc_type == "certidao_registo"
                    and "certidao_registo_features" in features
                ):
                    profile = build_profile_from_certidao(
                        features["certidao_registo_features"], entities
                    )

                elif doc_type == "nuit_atribuicao" and "nuit_features" in features:
                    profile = build_profile_from_nuit(
                        features["nuit_features"], entities
                    )

                if profile:
                    features["company_profile"] = profile
                    company_profiles.append(profile)

            except Exception as ex:
                alerts.append(
                    f"(Info) Falha ao construir CompanyProfile: {str(ex)}"
                )

            # =====================================================
            # 7️⃣ FEATURES BASE
            # =====================================================
            features.update(
                {
                    "legibility_score": leg,
                    "structure_score": struct,
                    "coherence_score": coh,
                    "credibility_score": cred,
                    "document_type_confidence": doc_conf,
                    "document_type": doc_type,
                }
            )

            # =====================================================
            # 8️⃣ RESULTADO POR DOCUMENTO
            # =====================================================
            results.append(
                DocumentAnalysisResult(
                    document_id=path,
                    document_type_detected=doc_type,
                    document_type_confidence=doc_conf,
                    legibilidade_score=leg,
                    estrutura_score=struct,
                    coerencia_score=coh,
                    credibilidade_score=cred,
                    extracted_entities=entities,
                    alerts=alerts,
                    features=features,
                )
            )
        # =====================================================
        # 9️⃣ CONSISTÊNCIA GENÉRICA ENTRE DOCUMENTOS
        # =====================================================
        global_consistency_score = 1.0  # padrão
        if company_profiles:
            validator = CrossDocumentValidator(company_profiles)
            validation_result = validator.validate()
            # Calcula um score baseado no número de issues
            num_issues = len(validation_result.get('issues', []))
            global_consistency_score = max(0.0, 1.0 - (num_issues * 0.2))  # exemplo
            # Opcional: também pode armazenar o resultado completo em features
            for r in results:
                r.features["cross_document_validation"] = validation_result

        for r in results:
            r.features["cross_document_consistency"] = global_consistency_score
            if global_consistency_score < 0.6:
                r.alerts.append(
                    "Inconsistência significativa entre documentos submetidos."
                )        # =====================================================
        # 🔟 CONSISTÊNCIA DA EMPRESA (ÚNICA FONTE LEGAL)
        # =====================================================
        if company_profiles:
            report = build_company_consistency_report(
                profiles=[p.__dict__ for p in company_profiles]
            )

            for r in results:
                r.features["company_consistency_report"] = report
                if report["status"] != "OK":
                    r.alerts.extend(report.get("alerts", []))

        return results
