
    z%oi                         d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlmZ  ej                  d      Z
 ej                  d      Zg dZd	ed
efdZded
efdZd Zedk(  r e        yy)    N)datetime)Path)load_document)
clean_textz\b\d{2}/\d{2}/\d{4}\bzB(?<!\w)(\d{1,3}(?:[ .]\d{3})*(?:[.,]\d{2})|\d+(?:[.,]\d{2}))(?!\w))zprocessado por computadorzwww.zdados de clientezdados da contazaccount datazcustomer datazsaldo inicialzsaldo finalrawreturnc                     | j                  dd      } d| v r'd| v r#| j                  dd      j                  dd      } nd| v r| j                  dd      } 	 t        |       S # t        $ r Y yw xY w)N  .,        )replacefloat
ValueError)r   s    VC:\Users\dkayr\PycharmProjects\Credit_Scoring_plataform\tests\bank_tests\teste_bank.pynormalize_amountr      st    
++c2
C
czcSjkk#r"**34	kk#s#Sz s   
A   	A,+A,textc                 T      j                          t         fdt        D              S )Nc              3   &   K   | ]  }|v  
 y w)N ).0kr   s     r   	<genexpr>z!is_noise_block.<locals>.<genexpr>-   s     3"2QqDy"2s   )loweranyHEADER_BLACKLIST)r   s   `r   is_noise_blockr   +   s     ::<D3"2333    c            	         t        t        j                        dk  r t        d       t        j                  d       t        t        j                  d         } t        d|        t        d       t        t        |             }t        |      }t        d       t        d       t        t        j                  |            }t        |      dk  r t        d       t        j                  d	       t        d
t        |       d       g }t        t        |            D ]  }||   j                         }|dz   t        |      k  r||dz      j                         n
t        |      }	 t        j                  ||   j!                         d      }||| j%                         }	|j'                  ||	f        g }
t)        |d      D ]$  \  }\  }}t        d|        t        d       t        d|j+                  d              t-        |      rt        d       Wt.        j1                  |      }|D cg c]  }t3        |      d	kD  st3        |       }}t        d|d d j5                  t7        d      d       d       t        d|        t        |      dk  rt        d       t9        |      }|d	k  rt        d       |
j'                  ||f       t        d|d       ' t        d       t        d       t        d       t        |
      dk  r t        d       t        j                  d	       |
j;                  d         d!}d!}|
d	   d   }|
dd  D ]*  \  }}||z
  }|d	kD  r||z  }n|d	k  r|t=        |      z  }|}, t        d"|d       t        d#|d       t        d$||z
  d       t        d%       y # t"        $ r Y w xY wc c}w )&N   u&   ❌ Caminho do extrato não informado.   u   
📄 Arquivo:u   🔎 Iniciando leitura...
u5   🏦 DEBUG – SEGMENTAÇÃO POR DATAS (FASE 2 + 3.1)zx========================================================================================================================u$   ❌ Menos de duas datas encontradas.r   u   📅 Datas detectadas: 
z%d/%m/%Y)startu   
🧾 TRANSAÇÃO zd----------------------------------------------------------------------------------------------------u   📅 Data: u(   🚫 BLOCO IGNORADO (cabeçalho/rodapé)u   📄 Texto (300 chars): i,  
   r
   z...u   💰 Valores encontrados: u-   ⚠️ BLOCO IGNORADO (valores insuficientes)u'   ⚠️ BLOCO IGNORADO (saldo inválido)u   ✅ Saldo inferido: z,.2fzy
========================================================================================================================u/   📊 TESTE DE FLUXO – DADOS LIMPOS (FASE 3.1)u/   ❌ Dados insuficientes para análise de fluxo.c                     | d   S )Nr   r   )xs    r   <lambda>zmain.<locals>.<lambda>   s    !r   )keyr   u   ➕ Total Créditos: u   ➖ Total Débitos:  u   📈 Fluxo Líquido:  u   
✅ FIM DO TESTE – FASE 3.1
)lensysargvprintexitr   r   strr   listDATE_REfinditerranger$   r   strptimegroupr   stripappend	enumeratestrftimer   	AMOUNT_REfindallr   r   chrmaxsortabs)	file_pathraw_textr   matchesblocksir$   enddate
block_textbalancesidxblockamounts_rawaamountssaldototal_creditostotal_debitosprev_deltas                         r   mainrT   0   s   
388}q67SXXa[!I	
Y'	
'(S^,HhD	
AB	)
 7##D)*G
7|a45	#CL>
45
 F3w< 
  "()AG(<ga!en""$#d)	$$WQZ%5%5%7DD %_**,
tZ() ! H'a88]dE#C5)*iDMM*5678% <=''.0;W1?OPQ?RUV?V#A&W(t)<)<SWc)J(K3OP*7)45w<!ABGA:;<u&$U4L125 9> 

	
;<	)
8}q?@MMnM%NMA;q>DQRL519e#NQYSZ'M ! 
!.!6
78	!-!5
67	"N]$BD"I
JK	
-.I  		* Xs   ('O
6O
O
	OO__main__)r+   rer   pathlibr   .src.infra.document_analysis.loaders.dispatcherr   3src.infra.document_analysis.preprocess.text_cleanerr   compiler1   r:   r   r/   r   r   boolr   rT   __name__r   r   r   <module>r]      s    
 	   H J "**-
.BJJI		 	# 	% 	4 4 4
j/Z zF r   