o
    ¹é¦h*<  ã                   @   sr   d Z ddlZddlZddlZddlmZmZmZmZ ddl	m	Z	 ddl
Z
e e¡ZG dd„ dƒZdefdd	„ZdS )
z©
Data Formatter Module

This module takes analyzed document data and formats it into the required
output formats: human-readable text format and structured JSON format.
é    N)ÚDictÚAnyÚListÚOptional)Údatetimec                   @   sp  e Zd ZdZdd„ Zdeeeeef f deeef fdd„Zdeeeeef f de	eeef  fdd	„Z
deeeeef f deeef fd
d„Zdeeeeef f de	eeef  fdd„Zdeeeeef f de	eeef  fdd„Zdeeeeef f deeef fdd„Zdeeeeef f defdd„Zdeeeeef f dedeeef fdd„ZdS )ÚDataFormatterz[
    Data formatter that converts analyzed document data into required output formats.
    c                 C   s¤   i dd“dd“dd“dg g g g g g dœ“dg “dg “d	d“d
d“dd“dg “dd“dd“dd“dd“dd“dd“dd“ddddddddœddddg g g dœ¥| _ dS )zInitialize the data formatter.ÚtotalEmployeesÚ ÚtechnicalStaffÚemployeeDetailsSpreadsheetÚfinancialDetails)ÚturnoverDetailsÚ
itrDetailsÚca_certificate_detailsÚbalanceSheetDetailsÚcertificateÚpastExperienceDetailsÚotherDocumentsÚdefaultKeywordsÚcompanyNameÚ
entityTypeÚregisteredAddressÚbranchOfficeAddressÚcompanyWebsiteÚaboutÚofficePhoneNumberÚofficeEmailÚ	officeFaxÚauthorizedPersonToSignDocsÚauthorizedPersonDesignation)ÚstartUpRegistrationNumberÚfile)ÚcinÚpanÚgstÚmsmeÚ
typeOfMsmeÚstartUpRegistrationÚcinFileÚpanFileÚgstFileÚmsmeFileÚdirectorDetailsr   r   N)Úcompany_form_template)Úself© r/   ú9/var/www/html/minaions-tender/ai-engine/data_formatter.pyÚ__init__   st   ÿþýúüôóòñðïîíìëêéèþØzDataFormatter.__init__Úanalysis_resultsÚreturnc              	   C   s   g g g g g dœ}|  ¡ D ]\}}| dd¡}| di ¡}| dd¡}d|v s*d|v rG| d| d	d¡¡| d
| dd¡¡dœ}|d  |¡ qd|v r`|| d| dd¡¡dœ}	|d  |	¡ qd|v shd|v ry|| dd¡dœ}
|d  |
¡ qd|v r| dd¡|dœ}|d  |¡ q|S )zö
        Extract financial information from analyzed documents.
        
        Args:
            analysis_results: Dictionary of analysis results
            
        Returns:
            Dict[str, Any]: Extracted financial information
        )r   r   r   r   r   Údoc_typer	   Úkey_infoÚ	file_namezFinancial:ITRÚIncome_Tax_ReturnÚtotal_incomeÚgross_incomeÚassessment_yearÚyear)ÚitrÚitrYearr   zFinancial:Balance_SheetÚfinancial_year)ÚauditedBalanceSheetÚauditedBalanceSheetYearr   zFinancial:CA_CertificateÚTurnover_CertificateÚdescription)Úca_certificateÚca_certificate_descriptionr   z
Financial:©rB   ÚfileNamer   ©ÚitemsÚgetÚappend)r.   r2   Úfinancial_infoÚ	file_pathÚresultr4   r5   r6   Ú	itr_entryÚbalance_entryÚca_entryÚ
cert_entryr/   r/   r0   Úextract_financial_infoD   s@   ûþþ
þ
þ€z$DataFormatter.extract_financial_infoc           	      C   sÖ   g }|  ¡ D ]b\}}| dd¡}| di ¡}| dd¡}d|v rh| d| dd¡¡| d| d	d¡¡| d
| dd¡¡| d| dd¡¡| d| dd¡¡| d| dd¡¡| d| dd¡¡|dœ}| |¡ q|S )a  
        Extract past experience information from analyzed documents.
        
        Args:
            analysis_results: Dictionary of analysis results
            
        Returns:
            List[Dict[str, Any]]: List of past experience details
        r4   r	   r5   r6   zExperience:ÚcustomerÚclientÚclientLocationÚlocationÚprojectÚproject_titleÚprojectScopeÚscopeÚprojectValueÚvalueÚprojectStartDateÚ
start_dateÚprojectEndDateÚend_date)rS   rU   rW   rY   r[   r]   r_   ÚdocumentrG   )	r.   r2   Úexperience_detailsrL   rM   r4   r5   r6   Úexperience_entryr/   r/   r0   Úextract_experience_infoy   s$   
ø

€z%DataFormatter.extract_experience_infoc              
   C   s  ddddddddddœ	}|  ¡ D ]y\}}| dd¡}| di ¡}| dd¡}d|v r‰d|v r;| dd¡|d	< ||d
< n>d|v rL| dd¡|d< ||d< n-d|v sTd|v re| d| dd¡¡|d< ||d< nd|v ry| d| dd¡¡|d< ||d< | d¡r‰|d s‰| d¡|d< q|S )zõ
        Extract legal and registration information from analyzed documents.
        
        Args:
            analysis_results: Dictionary of analysis results
            
        Returns:
            Dict[str, str]: Legal information
        r	   )	r   r"   r#   r$   r%   r(   r)   r*   r+   r4   r5   r6   zLegal:ÚPANÚ
pan_numberr#   r)   ÚGSTÚ
gst_numberr$   r*   ÚCINÚIncorporationr"   Úregistration_numberr(   ÚMSMEÚmsme_numberr%   r+   Úcompany_namer   )rH   rI   )r.   r2   Ú
legal_inforL   rM   r4   r5   r6   r/   r/   r0   Úextract_legal_info™   s>   ÷


€z DataFormatter.extract_legal_infoc           	      C   sZ   g }|  ¡ D ]$\}}| dd¡}| dd¡}| dd¡}d|v r*||dœ}| |¡ q|S )zô
        Extract certificates information from analyzed documents.
        
        Args:
            analysis_results: Dictionary of analysis results
            
        Returns:
            List[Dict[str, str]]: List of certificates
        r4   r	   r6   rB   zCertificates:rE   rG   )	r.   r2   ÚcertificatesrL   rM   r4   r6   rB   rQ   r/   r/   r0   Úextract_certificates_infoÈ   s   
þ
€z'DataFormatter.extract_certificates_infoc           	      C   sl   g }|  ¡ D ]-\}}| dd¡}| dd¡}| dd¡}d|v s(d|v s(d|v r3|||dœ}| |¡ q|S )	zâ
        Extract other documents information.
        
        Args:
            analysis_results: Dictionary of analysis results
            
        Returns:
            List[Dict[str, str]]: List of other documents
        r4   r	   r6   rB   zOther:z
Technical:zHR:)ÚnamerB   r!   rG   )	r.   r2   Ú
other_docsrL   rM   r4   r6   rB   Ú	doc_entryr/   r/   r0   Úextract_other_documentsâ   s   
ý
€z%DataFormatter.extract_other_documentsc                 C   s|   | j  ¡ }|  |¡}|  |¡}|  |¡}|  |¡}|  |¡}|d  |¡ ||d d< | |¡ ||d< ||d< ||d< |S )a  
        Format analyzed data into JSON structure based on company_form.json template.
        
        Args:
            analysis_results: Dictionary of analysis results
            
        Returns:
            Dict[str, Any]: Formatted JSON data
        r   r   r   r   )r-   ÚcopyrR   rd   rp   rr   rv   Úupdate)r.   r2   Úformatted_datarK   Úexperience_inforo   Úcertificates_infort   r/   r/   r0   Úformat_to_jsonÿ   s   






zDataFormatter.format_to_jsonc              	   C   s  g }|  d¡ |  d¡ |  d¡ i }| ¡ D ] \}}| dd¡}| d¡d }||vr0g ||< ||   |¡ q| ¡ D ]v\}}|  | ¡ › d¡ |  d	¡ |D ][}	|  d
|	 dd¡› ¡ |  d|	 dd¡› ¡ |  d|	 dd¡› ¡ |	 di ¡}
|
rš|  d¡ |
 ¡ D ]\}}|r™|  d|› d|› ¡ qˆ|  d|	 dd¡d›¡ |  d¡ qQ|  d¡ q<|  d¡ |  d	¡ t|ƒ}|dkrÒtdd„ | ¡ D ƒƒ| nd}|  d|› ¡ |  d|d›¡ |  dd | 	¡ ¡› ¡ |  d¡ |  dt
 ¡  d ¡› ¡ d! |¡S )"zÙ
        Format analyzed data into human-readable text format.
        
        Args:
            analysis_results: Dictionary of analysis results
            
        Returns:
            str: Formatted text
        z,COMPANY INFORMATION EXTRACTED FROM DOCUMENTSz<============================================================r	   r4   zOther:Unknownú:r   z DOCUMENTS:z------------------------------z
Document: r6   ÚUnknownzType: zDescription: rB   zNo descriptionr5   zKey Information:z  z: zConfidence: Ú
confidencez.2fzDOCUMENT ANALYSIS SUMMARY:c                 s   s    | ]	}|  d d¡V  qdS )r   r   N)rI   )Ú.0rM   r/   r/   r0   Ú	<genexpr>Q  s   € z/DataFormatter.format_to_text.<locals>.<genexpr>zTotal Documents Analyzed: zAverage Confidence: zCategories Found: z, zAnalysis completed on: z%Y-%m-%d %H:%M:%SÚ
)rJ   rH   rI   ÚsplitÚupperÚlenÚsumÚvaluesÚjoinÚkeysr   ÚnowÚstrftime)r.   r2   Útext_outputÚ
categoriesrL   rM   r4   ÚcategoryÚdocsÚdocr5   Úkeyr\   Ú
total_docsÚavg_confidencer/   r/   r0   Úformat_to_text  sL   





€

&

zDataFormatter.format_to_textÚ
output_dirc           
      C   sd  t j|dd i }|  |¡}t j |d¡}t|ddd}tj||ddd	 W d
  ƒ n1 s0w   Y  ||d< |  |¡}t j |d¡}t|ddd}| 	|¡ W d
  ƒ n1 s\w   Y  ||d< t j |d¡}	t|	ddd}tj||ddd	 W d
  ƒ n1 s‡w   Y  |	|d< t
 d|› ¡ t
 d|› ¡ t
 d|› ¡ t
 d|	› ¡ |S )a   
        Save formatted data to files.
        
        Args:
            analysis_results: Dictionary of analysis results
            output_dir: Directory to save output files
            
        Returns:
            Dict[str, str]: Dictionary mapping output type to file path
        T)Úexist_okzcompany_info.jsonÚwzutf-8)Úencodingé   F)ÚindentÚensure_asciiNÚjsonzcompany_info.txtÚtextzraw_analysis.jsonÚraw_analysiszSaved formatted data to z  - JSON format: z  - Text format: z  - Raw analysis: )ÚosÚmakedirsr|   Úpathrˆ   Úopenrœ   Údumpr”   ÚwriteÚloggerÚinfo)
r.   r2   r•   Úoutput_filesÚ	json_dataÚ	json_pathÚfÚ	text_dataÚ	text_pathÚanalysis_pathr/   r/   r0   Úsave_formatted_data\  s0   
ÿ
ÿÿz!DataFormatter.save_formatted_dataN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r1   r   Ústrr   rR   r   rd   rp   rr   rv   r|   r”   r®   r/   r/   r/   r0   r      s    *-.5* ./.*"2?r   r3   c                   C   s   t ƒ S )z†
    Factory function to create a DataFormatter instance.
    
    Returns:
        DataFormatter: Initialized formatter instance
    )r   r/   r/   r/   r0   Úcreate_data_formatterˆ  s   r´   )r²   rœ   ÚloggingrŸ   Útypingr   r   r   r   r   ÚreÚ	getLoggerr¯   r¥   r   r´   r/   r/   r/   r0   Ú<module>   s    
  x