o
    !6i                    @   s|  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlZd dlZd dlmZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlZd dlZd dlZ d dl!Z"d dl#m$Z$m%Z% d dl&Z'd dl(Z(d dl)Z*d d	l+m,Z, e,  e -d
Z.e -dZ/e -dZ0ej1ej2dd e3e4Z5ddddgddddddgdddddgddddddgddddddgddddddgdddd ddgd!ddd"ddgd#ddd$ddgd%ddd&d'dgd(ddd)d'dgd*ddd+d'dgd,ddd-d.dgd/dd0d1d2dgd3ddd4d5dgd6dd7Z6G d8d9 d9Z7G d:d; d;Z8G d<d= d=Z9e4d>kr<e9 Z:dS dS )?    N)datetime)Path)DictListOptionalAnyUnionTuple)urljoin)ClientError)setup_api_logging)!extract_documents_text_compatiblecreate_document_extractor)load_dotenvANTHROPIC_API_KEY
AWS_REGIONAWS_S3_BUCKETz4%(asctime)s - %(name)s - %(levelname)s - %(message)s)levelformatstandardzLCompany incorporation/registration document containing company name: {value}zBASIC COMPANY DETAILSzLEGAL & REGISTRATION DETAILSzCompany Name Document)categorydescription_templaterelevant_sectionsdisplay_namez*Entity type documentation showing: {value}zEntity Type Documentz2Company website verification document for: {value}zWebsite DocumentzSEmployee details document containing staff information and organizational structurezEMPLOYEE INFORMATIONzEmployee Detailsz0Company Incorporation Certificate - CIN: {value}z!Company Incorporation Certificatez9Permanent Account Number (PAN) Certificate - PAN: {value}zPAN CertificatezDGoods and Services Tax (GST) Registration Certificate - GST: {value}zGST Registration Certificatez<MSME Registration Certificate - Registration Number: {value}zMSME Registration CertificatezDDPIIT Startup Recognition Certificate - Registration Number: {value}zStartup Recognition Certificatez,Income Tax Return for Financial Year: {year}zFINANCIAL INFORMATIONzIncome Tax Returnz/Chartered Accountant Certificate: {description}zCA Certificatez0Audited Balance Sheet for Financial Year: {year}zAudited Balance Sheetz:Director documentation for {name} - PAN: {pan}, DIN: {din}zDIRECTOR INFORMATIONzDirector Documents
experiencezUExperience document for project '{project}' with client '{customer}' - Value: {value}zPAST EXPERIENCEzPast Experience DocumentzCertificate: {description}CERTIFICATIONSzCompany Certificate)companyName
entityTypecompanyWebsiteuploadedEmployeeDetails
cin_number
pan_number
gst_numbermsme_numberstartUpRegistrationNumberfinancial_itrfinancial_ca_certfinancial_balance_sheetdirector_detailspast_experiencecertificatec                   @   s8   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdS )Colorz)Utility class for colored terminal outputz[95mz[96mz[36mz[94mz[92mz[93mz[91mz[1mz[4mz[0mN)__name__
__module____qualname____doc__PURPLECYANDARKCYANBLUEGREENYELLOWREDBOLD	UNDERLINEEND r:   r:   ?/var/www/html/minaions-tender/ai-engine/minaions_rfp_service.pyr+      s    r+   c                   @   s  e Zd ZdZdd ZdedefddZdLded	edefd
dZd	edefddZ	dededefddZ
dededefddZdee dee fddZdedefddZdededefddZdedefddZd edefd!d"Zd ededefd#d$Zd	edededefd%d&Zd'edefd(d)Zd*ededefd+d,Zd*ededefd-d.Z	/dMd0ed1ed2eded3edefd4d5ZdNd1eded7edefd8d9Zdedee fd:d;Zded<edefd=d>Zded?edefd@dAZdOd1ededCedefdDdEZdFe j!ddfdGdHZ"dIedefdJdKZ#dS )P	ApiClientz,Handles all API calls to the Node.js servicec                 C   s"   | d| _|| _|dd| _dS )zInitialize API client/application/json)x-internal-api-keyzContent-TypeN)rstripbase_urlapi_keyheaders)selfrA   rB   r:   r:   r;   __init__   s
   zApiClient.__init__	tenant_idreturnc                 C   6   | j  d| }tj|| jd}| | | d S )zGet tenant by ID	/tenants/rC   datarA   requestsgetrC   _check_responsejsonrD   rF   urlresponser:   r:   r;   
get_tenant      
zApiClient.get_tenantN
company_idc                 C   sX   |du r| j  d| d}n| j  d| d| }tj|| jd}| | | d S )z,Get formatted company information for tenantNrI   z/company-infoz/company-info?company_id=rJ   rK   rL   )rD   rF   rV   rR   rS   r:   r:   r;   get_company_info   s   
zApiClient.get_company_infoc                 C   rH   )z4Get raw analysis of company's document from databasez/rawAnalysisById/rJ   rK   rL   )rD   rV   rR   rS   r:   r:   r;   get_company_analysis_data   rU   z#ApiClient.get_company_analysis_data	tender_idc                 C   s2  | j  d| d| }tj|| jd}| | | d }d|v rtdt|d   td|d   t	|d t
rg }|d D ]C}t	|trZd|v rZ|t|d  qEt	|tre|| qEtd	t| d
|  z	|t| W qE   td|  Y qE||d< td|d   |S )zGet tender by ID and tenant	/tenders/?tenant_id=rJ   rK   originalDocumentsz'Original documents in tender response: zDocument IDs: _idzUnexpected document ID format:  - z)Could not convert document ID to string: zConverted document IDs: )rA   rM   rN   rC   rO   rP   loggerinfotype
isinstancelistdictappendstrwarningerror)rD   rY   rF   rR   rS   tender_datastring_docsdocr:   r:   r;   
get_tender   s,   

zApiClient.get_tenderupdate_datac                 C   8   | j  d| }tj||| jd}| | | d S )zUpdate tender with new datarZ   rP   rC   rK   rA   rM   putrC   rO   rP   )rD   rY   rm   rR   rS   r:   r:   r;   update_tender      
zApiClient.update_tenderdocument_idsc              	   C   s  | j  d}g }|D ]4}|du rq
t|tr$d|v r$|t|d  q
t|tr7d|v r7|t|d  q
|t| q
td|  tj|d|i| j	d}|j
dkrtd	|j
  z| }td
|  W n   td|jdd   Y | | | d S )zGet documents by IDsz/documents/get-manyNr]   z$oidzRequesting documents with IDs: rt   ro     zError getting documents: HTTP Error details: Raw response:    rK   )rA   rb   rd   re   rf   r_   debugrM   postrC   status_coderh   rP   textrO   )rD   rt   rR   
string_idsdoc_idrS   
error_infor:   r:   r;   get_documents   s*   

zApiClient.get_documentsdocument_datac                 C   4   | j  d}tj||| jd}| | | d S )zCreate a new document recordz
/documentsro   rK   rA   rM   rz   rC   rO   rP   )rD   r   rR   rS   r:   r:   r;   create_document     
zApiClient.create_documentdocument_idc              	   C   s   t |dddd}td|  | j d| d| }tj|d| jid}|jd	krWt	d
|j  z|
 }t	d|  W n   t	d|jdd   Y | | |jS )zGet document content" 'z!Getting content for document ID: z/documents/z/content?tenant_id=r?   rJ   ru   z%Error getting document content: HTTP rv   rw   Nrx   )rf   replacer_   r`   rA   rM   rN   rB   r{   rh   rP   r|   rO   content)rD   r   rF   r~   rR   rS   r   r:   r:   r;   get_document_content  s   

zApiClient.get_document_contentanalysis_datac                 C   r   )zCreate a new analysis recordz	/analysesro   rK   r   )rD   r   rR   rS   r:   r:   r;   create_analysis4  r   zApiClient.create_analysisanalysis_idc                 C   rH   )zGet analysis by ID
/analyses/rJ   rK   rL   )rD   r   rR   rS   r:   r:   r;   get_analysis;  rU   zApiClient.get_analysisc                 C   rn   )zUpdate analysis with new datar   ro   rK   rp   )rD   r   rm   rR   rS   r:   r:   r;   update_analysisB  rs   zApiClient.update_analysisc                 C   s>   | j  d| d| }tj||| jd}| | | d S )z'Update company info with extracted dataz/companyDetails/r=   ro   rK   rp   )rD   rV   rF   rm   rR   rS   r:   r:   r;   update_company_infoI  s   
zApiClient.update_company_infobid_datac                 C   r   )z"Create a new bid generation recordz/bid-generationsro   rK   r   )rD   r   rR   rS   r:   r:   r;   create_bid_generationP  r   zApiClient.create_bid_generationbid_generation_idc                 C   s<   | j  d| d| }tj|| jd}| | | d S )z#Get bid generation by ID and tenant/bid-generations/r[   rJ   rK   rL   )rD   r   rF   rR   rS   r:   r:   r;   get_bid_generationW  s   
zApiClient.get_bid_generationc                 C   rn   )z#Update bid generation with new datar   ro   rK   rp   )rD   r   rm   rR   rS   r:   r:   r;   update_bid_generation^  rs   zApiClient.update_bid_generationFfile_contents3_keycontent_typeencryptc                 C   sj   | j  d}dtj|||fi}|||t| d}d| ji}	tj||||	d}
| 	|
 |

 d S )zUpload file to S3z/storage/uploadfile)rF   keyr   r   r?   )filesrK   rC   rK   )rA   ospathbasenamerf   lowerrB   rM   rz   rO   rP   )rD   r   r   r   rF   r   rR   r   rK   rC   rS   r:   r:   r;   upload_filee  s   


zApiClient.upload_fileTdecryptc                 C   sH   | j  d| d| dt|  }tj|d| jid}| | |jS )zDownload file from S3z/storage/download?tenant_id=&key=z	&decrypt=r?   rJ   )rA   rf   r   rM   rN   rB   rO   r   )rD   r   rF   r   rR   rS   r:   r:   r;   download_file{  s   $
zApiClient.download_filec                 C   s8   | j  d| d}tj|| jd}| | | d S )z#List company documents for a tenant/storage/company-documents/z/listrJ   rK   rL   rQ   r:   r:   r;   list_company_documents  s   
z ApiClient.list_company_documentsdownload_dirc                 C   s   | j  d| d}tj|d| jid}| | tj|d}t|d}|	|j
 W d   n1 s5w   Y  ddl}||d	}|| W d   n1 sTw   Y  t| d}	t|D ]\}
}
}|	t|7 }	qe|	S )
z+Download all company documents for a tenantr   z	/downloadr?   rJ   zcompany_documents.zipwbNr   r)rA   rM   rN   rB   rO   r   r   joinopenwriter   zipfileZipFile
extractallremovewalklen)rD   rF   r   rR   rS   zip_pathfr   zip_ref
file_count_r   r:   r:   r;   download_company_documents  s$   

z$ApiClient.download_company_documentslocal_docs_pathc              
   C   s   | j  d| d}g }t|D ],\}}}|D ]$}|ds#|dr$qtj||}	|d|t|	d| 	|ff qqd| j
i}
tj|||
d}|D ]
\}}|d	   qL| | | d
 S )z(Upload company documents from local pathr   z/upload..tmpr   rbr?   )r   rC      rK   )rA   r   r   
startswithendswithr   r   re   r   _get_content_typerB   rM   rz   closerO   rP   )rD   rF   r   rR   r   rootr   	filenamesfilename	file_pathrC   rS   
file_tupler:   r:   r;   upload_company_documents  s   "

z"ApiClient.upload_company_documents  
expires_inc                 C   sF   | j  d| d| d| }tj|| jd}| | | d d S )zGet signed URL for S3 objectz/storage/signed-url?tenant_id=r   z&expires_in=rJ   rK   rR   rL   )rD   r   rF   r   rR   rS   r:   r:   r;   get_signed_url  s   
zApiClient.get_signed_urlrS   c                 C   s|   |j dkr<d|j  }z(| }d|v r| d|d  }d|v r/| d|d  }W t|W t|   Y t|dS )z2Check response status and raise exception if errorru   z
API Error message: rh   r^   N)r{   rP   	Exception)rD   rS   	error_msg
error_datar:   r:   r;   rO     s   
zApiClient._check_responser   c                 C   s2   t j|d  }ddddddd}||d	S )
(Get content type based on file extensionr   application/pdf	text/html
text/plainGapplication/vnd.openxmlformats-officedocument.wordprocessingml.documentAapplication/vnd.openxmlformats-officedocument.spreadsheetml.sheetr>   ).pdf.html.txt.docx.xlsx.jsonapplication/octet-streamr   r   splitextr   rN   rD   r   extcontent_typesr:   r:   r;   r     s   zApiClient._get_content_typeN)F)T)r   )$r,   r-   r.   r/   rE   rf   r   rT   rW   rX   rl   rr   r   r   r   bytesr   r   r   r   r   r   r   r   boolr   r   r   intr   r   r   rM   ResponserO   r   r:   r:   r:   r;   r<      sB    	
#&
	r<   c                   @   s  e Zd ZdZd}defddZd~deded	efd
dZdeded	eeef fddZ	dededed	efddZ
dedededed	df
ddZdededed	eeef fddZd~dedeeeef  ded	eeef fddZd~dededed	eeef fddZdededeeeef  d	eeef fddZd~dededeeeef  ded	eeef f
dd Z	d~deded!ed"eded	eeef fd#d$Zded%ed	eeef fd&d'Zdeded	eeef fd(d)Zdedefd*d+Zd~dededed,ed-ed	efd.d/Z		0ddeded1ee d2ee d3ed	eeef fd4d5Z	6	0dded!eded7ed8ed	eeef fd9d:Z	6	0	ddeded7ed8ed2ee d	eeef fd;d<Z	0	0ddeded3ed=ed	eeef f
d>d?Zd@eeef dAed	dfdBdCZdDed	eeef fdEdFZdGed	efdHdIZdJedededKed	ee f
dLdMZ dNee dededKedOee d	ee fdPdQZ!defdRdSZ"dAedTefdUdVZ#dedWedXefdYdZZ$d[ed	efd\d]Z%d[ed^ed	efd_d`Z&dadb Z'dced	efdddeZ(	d~dedfedgeded	ee f
dhdiZ)djedked	efdldmZ*d[ed^edged	efdndoZ+d[ed	efdpdqZ,d[ed	efdrdsZ-dtedDed	efdudvZ.dted	dfdwdxZ/dted	efdydzZ0dted	efd{d|Z1dS )MinaionsRFPServicez1Main service for RFP processing using Minaions AIconfig.jsonconfig_pathc              
   C   s  zt |d}t|| _W d   n1 sw   Y  W n ty2 } z	td|   d}~ww tj| jd d d| _	| j
di 
dd	| _tj| jd
d | j
di 
dd}| j
di 
dtj
d}|sstdt||| _t| jd d | _d| _d| _d| _dS )z)Initialize the service with configurationr   NzFailed to load configuration: 	anthropicrB   )rB   file_storagebase_dirz/tmp/rfp_serviceTexist_oknodejs_serviceapi_urlz"http://localhost:5000/internal-apiINTERNAL_API_KEYzgInternal API key is required. Please set it in config.json or as INTERNAL_API_KEY environment variable.)r   rP   loadconfigr   r_   rh   r   ClientclientrN   r   r   makedirsenviron
ValueErrorr<   
api_clientr   	extractorcurrent_tenant_idcurrent_process_idcurrent_process_type)rD   r   r   eapi_base_urlrB   r:   r:   r;   rE     s,   
zMinaionsRFPService.__init__NrF   rV   rG   c           ,   
   C   s`  z0| j ||}|di }|di }| jd |i kri }|dg }|D ]"}|dd}	|dd}
|dd}|	rL|
sD|rL|
rH|
n|||	< q*d	}|d
7 }|d7 }|d7 }|drj|d|d  d7 }|dry|d|d  d7 }|dr|d|d  d7 }|dg }t|dD ]\}}|r|d| d| d7 }q|dr|d|d  d7 }|dr|d|d  d7 }|dr|d|d  d7 }|dr|d|d  d7 }|d r|d!|d   d7 }|d"r|d#|d"  d7 }|d7 }|d$7 }|d%7 }|d&r|d'|d&  d7 }|d(r+|d)|d(  d7 }|d*r;|d+|d*  d7 }|d7 }|d,7 }|d-7 }|d.rW|d/|d.  d7 }|d0rg|d1|d0  d7 }|d2rw|d3|d2  d7 }|d4r|d5|d4  d7 }|d6r|d7|d6  d7 }|d8r|d9|d8  d7 }|d7 }|d:g }|r|d;7 }|d%7 }t|dD ]Q\}}|dr|d<| d=7 }|d>|d  d7 }|d?r|d@|d?  d7 }|dAr|dB|dA  d7 }|dCr|dD|dC  d7 }|d7 }q|dEi }|r|dF7 }|d7 }|dGg }|D ]'}|dHrR|dIrR|dJd}|dK|dH  d| dL|dI  d7 }q,|dMg }|D ]}|dNrn|dO|dN  d7 }q\|dPg }|D ]}|dQr|dR|dQ  d7 }qx|dSg }|D ]}|dTr|dU|dT  d7 }q|d7 }|dVg }|rP|dW7 }|dX7 }t|dD ]\}}|dYs|dZrN|d[| d=7 }|dYr|d\|dY  d7 }|d]r|d^|d]  d7 }|dZr
|d_|dZ  d7 }|d`r|da|d`  d7 }|dbr*|dc|db  d7 }|ddr:|de|dd  d7 }|dfrJ|dg|df  d7 }|d7 }q|dhg }|r{|di7 }|dj7 }|D ]}|dkru|dl|dk  d7 }qc|d7 }|dm7 }| jdnt| do | jdp|  |W S i }|dg }|D ]'}|dd}	|dd}
|dd}|	r|
s|r|
r|
n|||	< qd	}|d
7 }|d7 }|d7 }|dr|d|d  d7 }|dr|d|d  d7 }|dr|d|d  d7 }|dg }t|dD ]\}}|r$|d| d| d7 }q|dr6|d|d  d7 }|drF|d|d  d7 }|drV|d|d  d7 }|drf|d|d  d7 }|d rv|d!|d   d7 }|d"r|d#|d"  d7 }|dqr|dr|dq  d7 }|d7 }t } |ds7 }|d
7 }| D ]y\}!}
|
dtt	j
|
dui dvdw|
dk|
dxf}"|"| v r̐q| |" |
dtdy}#|
dkdy}$|
dxdy}%|
dui }&|dz|# d7 }|d{|$ d7 }|d||% d7 }|&r|d}7 }|& D ]\}'}(|d~|' d|( d7 }q|d7 }q| jdt|  d |W S  ty }) zq| jd|)  z\| j ||}|di }|dg }i }*|D ]}|dd}	|dd}
|	rs|
rs|
|*|	< q[d|*dd d}+|+d|*dd d7 }+|+d|*dd d7 }+|+W W  Y d})~)S    Y W Y d})~)dS d})~)ww )z
        Get and format company information from API (structured data only)
        
        Args:
            tenant_id: Tenant ID
            
        Returns:
            str: Formatted company information from structured data
        company_inforaw_analysisz-Retrieved company info (structured data only)detailsnamer   valuer   z*COMPANY INFORMATION FOR TENDER SUBMISSION
z>============================================================

zBASIC COMPANY DETAILS:
z----------------------
r   zCompany Name: 
r   zEntity Type: registeredAddresszRegistered Address: branchOfficeAddressr   zBranch Office Address r   r   z	Website: officePhoneNumberzPhone: officeEmailzEmail: 	officeFaxzFax: authorizedPersonToSignDocszAuthorized Signatory: authorizedPersonDesignationzDesignation: zEMPLOYEE INFORMATION:
z---------------------
totalEmployeeszTotal Employees: technicalStaffzTechnical Staff: r   zEmployee Details Document: zLEGAL & REGISTRATION DETAILS:
z----------------------------
r    z$Company Incorporation Number (CIN): r!   z Permanent Account Number (PAN): r"   zGoods and Services Tax (GST): r#   z9Micro, Small and Medium Enterprises (MSME) Registration: 
typeOfMsmezMSME Type: r$   zVDepartment for Promotion of Industry and Internal Trade (DPIIT) Startup Registration: directorDetailszDIRECTOR INFORMATION:
z	Director z:
z  Name: addressz  Address: panz"  Permanent Account Number (PAN): dinz(  Director Identification Number (DIN): financialDetailszFINANCIAL INFORMATION:
turnoverDetailsturnoverYearturnoverturnoverUnitzAnnual Turnover  
itrDetailsitrYearzIncome Tax Return (ITR) Year: ca_certificate_detailsca_certificate_descriptionz'Chartered Accountant (CA) Certificate: balanceSheetDetailsauditedBalanceSheetYearBalance Sheet Year: pastExperienceDetailszPAST EXPERIENCE:
z----------------
customerprojectzProject z  Client Name: clientLocationz  Client Location: z  Project Title: projectValuez  Project Value: projectScopez  Project Scope: projectStartDatez  Start Date: projectEndDatez  End Date: r*   zCERTIFICATIONS:
z--------------
descriptionz- z=============================================================
z,Successfully formatted company information (z characters)zCompany information: 
 aboutz!About Company (Additional Info): zDOCUMENT ANALYSIS INFORMATION
doc_typekey_infoT)	sort_keysr   N/AzDocument Type: zDescription: zFile Path: zKey Information:
  - z>------------------------------------------------------------

z
Formatted z# unique documents from raw_analysisz&Error formatting company information: zNot specifiedz	Contact: Nz!Company information not available)r   rW   rN   process_loggerr`   	enumerater   setitemsrP   dumpsaddr   rh   ),rD   rF   rV   company_datar  r  details_infodetails_listitemr  r  file_urlformatted_infobranch_addressesir  r(   directorfinancial_detailsturnover_detailsr  unititr_detailsitrca_cert_detailsca_certbalance_sheet_detailsbalance_sheetr)   r   certificatescertseenr   identityr/  r-  r   r0  kvr  
basic_infofallback_infor:   r:   r;   _format_company_information  s  










$\

z.MinaionsRFPService._format_company_informationrY   c           #      C   s  z|  ||d | j|ddt ddii | jd| d|  | j||}|s5td| | jd	|  | j|}|sMtd
| t	j
| jd| }t	j|dd g }| jd|d|  |dg }dd |D }| jd|  |std | j|ddt ddddddii d|ddg ddW S | jdt| d | j|}| jdt| d g }	|D ]}
zt|
d d!}|
d"d#| }t|
d$d!}t|
d%d!}|d&kr|d'kr| jd(| d)| d* td+|  | j||}| jd,| d-t| d. t	j
||}t|d/}|| W d0   n	1 sBw   Y  | jd1|  | d2r|| jd3|  t|}|r||	| | jd4t| d5|  zt	| W n ty } z| jd6| d-|  W Y d0}~nd0}~ww W q ty } z| jjd7|
d  d-| dd8 W Y d0}~qd0}~ww tt |	}| jd9t| d: |rtj!||| jd;}t"| t#|}|D ]}zzt	j
$|}t|d<}|% }W d0   n	1 sw   Y  |dt|}t&'d=d>|}d?| d@| dA| }| (|}| j)||||}||dBd'dC|||dD|dEdF}| j*|}
t|
d  }|+||t|dG | jdH|  W q ty } z| jdI| d-|  W Y d0}~qd0}~ww dJd |D }dKd |dg D } | | }!ddL|iidt dMt|t|dNdidO}"| j||" | j|ddt dt|t|dNdii zt,-| W n   Y d|t|t||dPt| dQdW S  ty? } z6tdRt|  z| j|ddt dSdTt|idii W n   Y dTdRt| dUW  Y d0}~S d0}~ww )VzKExtract links from existing RFP documents and download additional documentsget_rfp_documents$setlogsget_rfp_documents_started	timestampactionzGetting tender z for tenant Tender not found: zGetting tenant zTenant not found: 	rfp_docs_Tr   z,Getting additional RFP documents for tender 	bidNumberr\   c                 S   s   g | ]}|rt |qS r:   rf   .0r~   r:   r:   r;   
<listcomp>w  s    z8MinaionsRFPService.get_rfp_documents.<locals>.<listcomp>zDocument IDs from tender: zNo document IDs found in tender$pushget_rfp_documents_completedr   z'No documents found in tender to process)links_founddocuments_downloadedr   rZ  r[  r  success)statusrY   re  rf  downloaded_documentsr   zGetting 
 documentsz
Retrieved r]   r   r  doc_ra   r   bid_documenttenderzProcessing document:  (ID: )zGetting content for document zReceived content for document r   z bytesr   NSaved document to r   zExtracting links from PDF: z
Extracted z links from zCould not delete temp file zError processing document )exc_infoFound z" unique links across all documentsr4  r   [\/\\]r   tenders/r=   z/linked/rfps3extracted_link)tenderIdr^  sourceparentTenderIdtenantr  ra   r   storageTypestorageDetailsmetadata)r   r   sizezSaved linked document: zError saving linked document c                 S   s   g | ]}|d  qS )r   r:   ra  rk   r:   r:   r;   rb        c                 S      g | ]}t |qS r:   r_  r`  r:   r:   r;   rb    r  z$eachlinked_documents_added)re  rf  	$addToSetrc  zSuccessfully downloaded z additional RFP documentszError getting RFP documents: get_rfp_documents_failedrh   )ri  r   )._setup_process_contextr   rr   timer4  r`   rl   r   rT   r   r   r   r   r   rN   r_   rg   r   r   rf   r   r   r   r   r   taextract_links_from_pdfextendr   r   rh   rc   r6  download_linked_documentsremove_duplicate_pdfslist_files_in_directoryr   readresubr   r   r   re   shutilrmtree)#rD   rY   rF   rn  r~  processing_dirrj  rt   	documents	all_linksrk   r~   doc_namer/  doc_categorydoc_content	temp_pathr   linkscleanup_errr  unique_linksdownloaded_filesr   r   r   
bid_numberr   r   storage_detailsdocument_recordnew_doc_idsexisting_doc_idsall_doc_idsrm   r:   r:   r;   rU  Q  sj  

	

$.



&
	

	z$MinaionsRFPService.get_rfp_documentsc           	      C   s   z<| j ||}|std| |||ddddi t dddid	gd
	}| j |}t|d }td|  |W S  tyQ } z	t	d|   d}~ww )zGCreate a placeholder analysis record immediately and return analysis_idr\  analysis_estimation
estimatingr   INRanalysis_estimation_startedr   zCost estimation in progressrg  	r~  rn  companyra   ri  estimatedCostcurrencyanalysisMetricsrW  r]   z%Created placeholder analysis record: z%Error creating analysis placeholder: N)
r   rl   r   r  r   rf   r_   r`   r   rh   )	rD   rY   rF   rV   rn  analysis_recordanalysisr   r  r:   r:   r;   create_analysis_placeholderH  s6   z.MinaionsRFPService.create_analysis_placeholderr   c           #      C   s`  zV|  ||d | jd td|  | j||}|s'td| dd |dg D }| j|}t	j
| jd| }t	j
|d	}	t	j|	d
d t	j
|	d}
t	j
|	d}d| d}| j||d}t|dkr|dkrt|d}|| W d   n1 sw   Y  nLg }|D ]>}z|dd|d  }|di }||d W q ty } z| jd|d d|  W Y d}~qd}~ww tj||| jd}d| d}| j||d}t|dkr|dkrt|
d}|| W d   n	1 sw   Y  i }d}d}| jd t||	| jd  d! | jd}| j|}t	j
|	d}| ||}| jd"|  | j|}| jd#| d$ || }| jd%| d& d'}t|
d(}| }W d   n	1 s~w   Y  d| d}d)}| j||||}| jd* d'}t|d(}| }W d   n	1 sw   Y  d| d}d+}| j||||}| jd, td-|| }t	j
 |rt!"| | jd.| d/ n
| jd.| d0 d1}g }|d2krd3}|d4 |d5krd6}|d7 t|d8kr!|d99 }|d: t#|| d;} d<| |t|||||d=d>}!| j$||! | jd? | jd@|  tdA|   W dS  ty } zKtdB|  zdCt%% dDdEt&|idFgdG}!| j$||! W n ty }" ztdH|"  W Y d}"~"nd}"~"ww W Y d}~dS W Y d}~dS d}~ww )IzMProcess analysis cost estimation in background and update the analysis recordr  z;Analyzing document complexity and preparing for analysis...z5[INTERNAL] Starting cost estimation for analysis_id: r\  c                 S   r  r:   r_  r`  r:   r:   r;   rb  z  r  zRMinaionsRFPService.process_analysis_cost_estimation_background.<locals>.<listcomp>r\   analysis_cost_analysis_cost_outputTr   
merged.txtdoc_text.jsontender_analysis//doc_text.jsonFr   falser   Nr  rl  r]   r  rR   Failed to get document URL r   rt  /merged.txt{Gzt?KAttempting document extraction with external extractor for cost analysis...r   rB   Company Info: Company Info length:  wordsTotal word count is 
Uploading files to S3 now.r   r   r   Uploaded merged.txt file to S3r>   !Uploaded doc_text.json file to S3
   Directory '' deleted successfully.' does not exist.      ?'  333333?Large document sizea        ?Very large document size   皙?Multiple documents   	estimated	wordCountdocumentCountcomplexityFactorcomplexityIndicatorsbaseCostcostPerWord)ri  r  r  (Analysis complexity assessment completedTotal words analyzed:    [INTERNAL] Estimated cost: ₹z.Error in background analysis cost estimation: estimation_failedanalysis_estimation_failedrh   rg  )ri  rW  -Failed to update analysis with error status: )'r  r4  r`   r_   r   rl   r   rN   r   r   r   r   r   r   r   r   r   r   re   r   rh   r  r  r   r   r   count_total_wordsrT  count_wordsr  r   maxexistsr  r  roundr   r  rf   )#rD   rY   rF   rV   r   rn  rt   r  temp_dir
output_dirout_txt_fileout_json_filer   r  r   r  rk   r   r  r  r  documents_texttotal_word_countbase_cost_per_wordmerged_file_pathr  company_info_word_countr   r   	base_costcomplexity_factorcomplexity_indicatorsestimated_costrm   update_errorr:   r:   r;   +process_analysis_cost_estimation_backgroundm  s   *








z>MinaionsRFPService.process_analysis_cost_estimation_backgroundc           "      C   s  |  ||d | d | j||}|std| dd |dg D }| j|}tj| j	d| }tj|d}tj
|d	d
 tj|d}	tj|d}
d| d}| j||d}t|dkr|dkrt|
d}|| W d   n1 sw   Y  nLg }|D ]>}z|dd|d  }|di }||d W q ty } z| jd|d d|  W Y d}~qd}~ww tj||| jd}d| d}| j||d}t|dkr|dkrt|	d}|| W d   n	1 sw   Y  i }d}d}| jd t||| jd d  | jd}| j|}| ||}| jd!|  | j|}| jd"| d# || }| jd$| d% d&}t|	d'}| }W d   n	1 skw   Y  d| d}d(}| j||||}| jd) d&}t|
d'}| }W d   n	1 sw   Y  d| d}d*}| j||||}| jd+ td,|| }tj|rt !| | jd-| d. n
| jd-| d/ d0}g }|d1krd2}|d3 |d4krd5}|d6 t|d7kr|d89 }|d9 t"|| d:}|||dd;|d<|t|||||d=t## d>||t|d?d@gdA	}| j$|} t%| d }!| jdB | jdC|  t&dD|  |!|d<t||||dEdFS )GzHEstimate the cost for RFP analysis based on document complexity and sizer   r  r\  c                 S   r  r:   r_  r`  r:   r:   r;   rb    r  z=MinaionsRFPService.estimate_analysis_cost.<locals>.<listcomp>r\   r  r  Tr   r  r  r  r  Fr   r  r   Nr  rl  r]   r  rR   r  r   rt  r  r  r  r   rB   r  r  r  r  r  r   r   r   r  r>   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  analysis_cost_estimation)r  
word_countdocument_countrg  r  r  r  r  )r  r  r  r  )r   r  r  r  )'r  r   rl   r   rN   r   r   r   r   r   r   r   r   r   r   re   r   r4  rh   r  r  r`   r   r   r   r  rT  r  r  r   r  r  r  r  r  r  r   rf   r_   )"rD   rY   rF   rV   rn  rt   r  r  r  r  r  r   r  r   r  rk   r   r  r  r  r  r  r  r  r  r   r   r  r  r  r  r  r  r   r:   r:   r;   estimate_analysis_cost  s   *







	z)MinaionsRFPService.estimate_analysis_costfolder_listc              
      s  z1 |d jd|  tjjd| d }tj|dd tj|d| }tj|dd di  i i j	|
d	i i rr D ]\}}t|tred
|v re||d
 < qRjdt d  f	dd|D ]}|| qtj|d}	z+t|	ddd}
tj |
ddd W d   n1 sw   Y  jd|	  W n ty } zjd|  W Y d}~nd}~ww r5tj|d}z6t|ddd}
tj|
ddd W d   n1 sw   Y  jd|  jdt  W n ty4 } zjd|  W Y d}~nd}~ww t}|dkrgjd| d t||t}|d r[jd  njd!|d"   n
jd# ddi}tj|d$} }tj|rzHt|d%dd}
t|
}W d   n	1 sw   Y  r߈ D ]5\}}||v r||  }||d&< |||< jd'| d(|  qjd)| d*|  q| D ]\}}d
|vrMd} D ]\}}||kr||d
< d} nq|s(|v r(| } D ]\}}||kr&||d
< d} nq|sMtj|rC||d
< jd+|  n
jd,| d- |
d&|kr\||d&<  
|}|rv|||< jd.| d/|  q|||< jd0|  qz)t|ddd}
tj||
ddd W d   n	1 sw   Y  jd1 W n ty } zjd2|  W Y d}~nd}~ww W n ty } zjd3|  W Y d}~nd}~ww jd4 j||d5d6d7t d5d8id9 ||d:d;}z	t | W |W S  ty2 } zjd<|  W Y d}~|W S d}~ww  tyH } z	t!d=|   d}~ww )>z4Analyze Company documents to extract key informationcompany_doc_analysisCompany Id: company_r   Tr   company_merged_r   rawAnalysis	file_hashrs  z! existing file hashes in databasec              	      s  t j|| d }t j|dd | dg D ]}|di }|d}|di }|d}|d	}|r7|s8qt j||}	j|d
}
|
rQ|
dksQ|
dkrRqt|	d}||
 W d    n1 sgw   Y  |rr| |	< 	|	}|v r| }j
d|	 d|  |r|v r|  }|	|d< ||d< ||< | |	< j
d|  t |	 n"|v r׈| }||	< t |	 j
d|	 d|  n|	|< d7 q| dg D ]}|| qd S )Nfolder_nameTr   r   
documentIdr]   r  r   r  Fs   FalseFalser   z"Cross-request duplicate detected: z matches existing file_id: r   r  z3Copied existing analysis to new duplicate file_id: z0Current-request duplicate detected and removed: z -> original: r   
subfolders)r   r   r   r   rN   r   r   r   r   _calculate_file_hashr4  r`   copy_transform_file_pathr   )folder	base_pathfolder_path
file_entryrk   file_idstorager   r   
local_filer   r   r  existing_file_idduplicate_analysisoriginal_file	subfolder	directory_mapduplicate_mapexisting_analysisexisting_file_hashesr   file_hash_mapprocess_folderrD   rF   r:   r;   r    sR   





z?MinaionsRFPService.analyze_company_docs.<locals>.process_folderzdirectory_map.jsonwutf-8encodingr  Findentensure_asciiNzDirectory mapping saved to z"Failed to save directory mapping: zduplicate_map.jsonzDuplicate mapping saved to z4Total current-request duplicates found and skipped: z"Failed to save duplicate mapping: zProcessing z unique filesrh  z"Processing completed successfully!zProcessing failed: rh   z6No unique files to process - all files were duplicateszraw_analysis.jsonr   r   zCopied analysis from z to duplicate zOriginal file z% not found in analysis for duplicate zCalculated missing hash for z"Could not determine file_hash for z* - file not found and no mapping availablezMapped z -> zNo file_id mapping found for: z/Updated raw_analysis.json with file_id mappingsz*Failed to save updated raw_analysis.json: z%Failed to process raw_analysis.json: zUpdating Company Info in DBprocessing_completed)r   ri  rW  rY  rV  rc  	processed)rV   rF   r  ri  z&Failed to cleanup download directory: z%Failed to analyze Company documents: )"r  r4  r`   r   r   r   r   r   r   rX   rN   r7  rb   rd   r   r   rP   dumpr   rg   mdpprocess_directoryr   rh   r  r  r   r  r  r   r  r  r  r_   )rD   rF   r  rV   r   r  r  r   root_folderdirectory_map_filer   r  duplicate_map_fileunique_files_countresultsraw_analysis_filer  temp_raw_analysisduplicate_pathoriginal_pathr  r  
hash_foundhash_valr   results_dictr:   r  r;   analyze_company_docs  s  O







z'MinaionsRFPService.analyze_company_docsc           )      C   s`  zy|  ||d | jd|  | j|}|s!td| t|d |kr>| jd|d  d|  td| t|d }| jd|  | j||}|s_td	|d  | j	|d
didt

 ddid |dsdd |dg D }| j|}tj| jd| }	tj|	dd g }
|D ]>}z|dd|d  }|di }|
|d W q ty } z| jd|d d|  W Y d}~qd}~ww tj|	|
| jd}tj|	d}tj|dd tj|d }tj|d!}d"| d#}| j||d$}t|d%kr8|d&kr8t|d'}|| W d   n	1 s3w   Y  d"| d(}| j||d$}t|d%krm|d&krmt|d'}|| W d   n	1 shw   Y  z| jd) t|	|| jd* d+ | jd}| jd,|rt|nd% d- d.}t|d/}| }W d   n	1 sw   Y  d"| d(}d0}| j||||}d.}t|d/}| }W d   n	1 sw   Y  d"| d#}d1}| j||||}W n ty } z| jd2|  d}W Y d}~nd}~ww | ||	 |std3| jd4 t !||}tj"||| jd\}}| jd5 t#j$|| jd}|D ]H}|d6 }|d7 }|d8 } |d9 }!t%t&| |!d: }"| |!krl| d;|  d<| }#n| d=|  d>|! d<| }#||t#j'j() |#||"d? qA| j*|d||t|t|t+d@dA |, D dBdCidt

 dDdid t|}$n|di dE}|di dF}|di dGi dH}$| -||}%| jdI|%  | jdJt|% dK | jdL tj.||%| jd\}&}'| jdM|&  |||&|'dNdO}(| j	|dP|(dQdt

 dR|&|$dSdTid zt/0|	 W n tyO } z| jdU|  W Y d}~nd}~ww |&rc| jdV|dW dX|'  n| jdY|dW dX|'  dP|&|'||dZW S  ty } z(t1d[|  z| j	|d
d\idt

 d]d^t|idTid W     Y  d}~ww )_zFAnalyze RFP documents to extract key information and check eligibilityr  r  Analysis record not found: r~  Tenant mismatch:  vs rn  )   ✅ Analysis found and tenant validated: r\  ri  analysis_in_progressrW  rfp_analysis_startedrY  r#  c                 S   r  r:   r_  r`  r:   r:   r;   rb    r  z2MinaionsRFPService.analyze_rfp.<locals>.<listcomp>r\   	analysis_Tr   r  rl  r]   r  rR   r  r   Nrt  analysis_outputr  r  r  r  Fr   r  r   r  z9Attempting document extraction with external extractor...r   rB   zDocument extractor returned rk  r   r   r   r>   zDocument extractor failed: z8No text was extracted from documents using either methodz-Starting tender document analysis with LLM...z"Tender document analysis completedannexure_namedocument_name
page_startpage_endr   z	 on page z of z
 on pages -)r  ra   r-  source_documentpagesc                 s   s    | ]	}t | V  qd S r   )r   split)ra  r|   r:   r:   r;   	<genexpr>N      z1MinaionsRFPService.analyze_rfp.<locals>.<genexpr>)documentsProcessedanalysisChunkstotalWordCount)extracted_inforequired_docsanalysisMetadatatender_analysis_completedrJ  rK  rL  rG  r  r  z characterszStarting eligibility check...zEligibility check completed: )
isEligiblereason)extractedInforequired_documentseligibilityanalysis_completed)ri  r,  rfp_analysis_completed)is_eligibledocuments_processedrg  'Failed to cleanup temporary directory: u$   🚀 Company is eligible for tender r^  z
. Reason: u*   ⚠️ Company is NOT eligible for tender )ri  rU  eligibility_reasonrJ  rQ  zFailed to analyze RFP: analysis_failedrfp_analysis_failedrh   )2r  r4  r`   r   r   r   rf   rh   rl   r   r  rN   r   r   r   r   r   r   re   r   r  r  r   r   r   r   r   r   r  r   rg   _debug_extraction_resultseaaextract_annexures_infoanalyze_tender_with_LLMbpaidentify_required_documentsrc   rangeDocumentTypeANNEXUREupperrr   sumvaluesrT  check_eligibilityr  r  r_   ))rD   r   rF   rV   r  rY   rn  rt   r  r  r  rk   r   r  r  r  r  r  r  r   r  r   r  r   r   annexure_hintsrJ  doc_analysesrK  annexurer=  r>  r?  r@  rC  r-  rV  r  rU  rX  analysis_resultr:   r:   r;   analyze_rfp  sn  
*"



	

 

zMinaionsRFPService.analyze_rfprK  c                 C   s  |  ||d | j|}|std| t|d |kr3| jd|d  d|  td| | jd|  |ddkrGtd	|d
i }|di }|di }d}dd |D }	dd |	D }
dd |	D }t	|
}t	|}t	|	}d}g }|D ]}dt
dd }||7 }||dddt|dd q|
D ]}dt
dd }||7 }||dddt|dd qt|| d}|t|d |dd |d!||||t|d||	d"t d#||t|d|d$d%gd&	}| j|}t|d' }| jd( | jd)|  | jd*|  | jd+t|d  | jd,| d-| d.| d/ | jd( ||d!|t|d|||||	d0d1S )2z-Estimate the cost for bid document generationestimate_bid_generationr5  r~  r6  r7  r8  ri  rS  z@Analysis must be completed before estimating bid generation costr,  rR  rP  i  c                 S   s$   g | ]}| d d dv r|qS )ra   r   )CUSTOMrc  rN   rd  r  r:   r:   r;   rb       $ zCMinaionsRFPService.estimate_bid_generation_cost.<locals>.<listcomp>c                 S   $   g | ]}| d d dkr|qS )ra   r   rn  ro  r  r:   r:   r;   rb    rp  c                 S   rq  )ra   r   rc  ro  r  r:   r:   r;   rb    rp  r   }   r  r  r  Annexurerc  r  )documentra   costd   zCustom Documentrn  rn  bid_generation_estimationr  r  )totalBillableDocumentscustomDocumentCountannexureCountr  additionalCostcostBreakdownbillableDocumentsbid_generation_cost_estimation)r  r  additional_costbillable_document_countrg  )	r~  rn  
analysisIdra   ri  r  r  generationMetricsrW  r]   z2==================================================u"   Estimated Bid Generation Cost: ₹u   Base Cost: ₹u   Additional Cost: ₹zBillable Documents: z
 (Custom: z, Annexure: rp  )r  r  total_billable_documentscustom_document_countannexure_countcost_breakdownbillable_documents)r   r  r  r  )r  r   r   r   rf   r4  rh   r`   rN   r   randomuniformre   r  r  r   )rD   r   rF   rK  r  r,  rR  rJ  base_generation_costbillable_docscustom_docsannexure_docscustom_countr  total_billable_countr  r  rj  annexure_cost
custom_doccustom_costr  bid_generation_recordbid_generation
bid_gen_idr:   r:   r;   estimate_bid_generation_cost  s   






 z/MinaionsRFPService.estimate_bid_generation_costr   c           "      C   s>  |  ||d | jd|  | j||}|s td| t|d }| j|}|s5td| t|d }| j||}	|	sMtd|d  | j	|dd	id
t

 ddid ztj| jd| }
tj| jd| }tj|
dd tj|dd dd |	dg D }| j|}|D ]{}zW| jt|d |}|dd|d  }|ds|di ddkr| d}tj|
|}t|d}|| W d   n1 sw   Y  | jd|  W q ty } z| jd|d d |  W Y d}~qd}~ww | ||}| ||||}tj|
d!}tj|dd tj|d"}t|d#d$d%}tj|d&i d'i |d(d) W d   n	1 s[w   Y  | jd*|	d+  tj|
|||| jd,\}}}| ||||	d+d-}| ||||	d+d-|}zt !|
 t !| W n ty } z| j"d.|  W Y d}~nd}~ww g }g }|D ]&}|d/d-}|d0d-|d1d-|d2d-|d3d-|d4} |#|  q| jd5|  || }!| j	||!|d6d7||d8d
t

 d9t$|!t$|t$||t$|d:d;id< | jd=t$| d>t$| d? d@|!||t$|!dAt$| d>t$| dBdCW S  ty } zLt%dD|  z&dEt& v ritj'|
rit !|
 dFt& v r{tj'|r{t !| W n   Y | j	|ddGid
t

 dHdIt|id;id  d}~ww )Jz9Generate complete bid documents using enhanced automationr  r  !Bid generation record not found: r  r5  rn  r\  ri  generation_in_progressrW  bid_document_generation_startedrY  r#  bid_gen_company_docs_Tr   c                 S   r  r:   r_  r`  r:   r:   r;   rb  _  r  z=MinaionsRFPService.generate_bid_documents.<locals>.<listcomp>r\   r]   r  rl  r   r  mimeTyper   r   Nrq  Failed to save document r   tender_analysistender_analysis.jsonr  r  r  r,  rP     r   z$Generating bid documents for tender r^  rt  r   z)Failed to cleanup temporary directories: r>  r   r   r{  ra   )r   r   r{  ra   r>  zcompany document urls )generatedDocumentscompanyDocumentUrlsgeneration_completed)ri  outputDirectorydocumentStatus!bid_document_generation_completed)r  generated_countreferenced_countoutput_directorycompany_docs_countrg  )r  rV  rc  zSuccessfully generated z bid documents and referenced  company documents	completedz
Generated z company documents successfully)ri  generated_documentscompany_document_urlsr  r  r   z"Failed to generate bid documents: temp_rfp_dirtemp_company_docs_dirgeneration_failedbid_document_generation_failedrh   )(r  r4  r`   r   r   r   rf   r   rl   r   r  r   r   r   r   r   rN   r   r   r   r   r   r   rh   rT  (_download_and_organize_company_documentsrP   r%  r_  prepare_bid_documents_save_generated_documents_to_s3#_create_referenced_document_entriesr  r  rg   re   r   r_   localsr  )"rD   r   rF   rK  rV   bid_genr   r  rY   rn  r  r  rt   r  rk   r  r   r   r   r  formatted_company_infodownloaded_company_docsanalysis_diranalysis_filefinal_docs_dirdocument_statusreferenced_company_docsgenerated_document_idsreferenced_document_idssimplified_company_docscompany_docs_urlsr>  simplified_docall_document_idsr:   r:   r;   generate_bid_documents6  s  
 
*"




"	


z)MinaionsRFPService.generate_bid_documentsr   user_promptc           +      C   s  |  ||d z| j||}|std| t|d }| j|}|s.td| t|d }	| j|	|}
|
sFtd|d  | j|g}|sVtd| |d }|d	d
| }| j	
d| d| d tj| jd| }tj|dd z| j||}tj||}t|d}|| W d   n1 sw   Y  | j	
d|  dd |
dg D }| j|}|D ]x}zT| jt|d |}|d	d
|d  }|ds|di ddkr| d}tj||}||krt|d}|| W d   n	1 sw   Y  W q ty@ } z| j	d|d d|  W Y d}~qd}~ww tj|d}tj|dd tj|d}t|d d!d"}tj|d#i d$i |d%d& W d   n	1 syw   Y  | ||}tj|d'}tj|dd tj|d(}tj|dd | j	
d)| d*|  d+}| j	
d,|  tj|||||d#i d$i d-\}}|std.tj|s4z7| j	d/|  | j	d0| d1 t|D ]\}} }!|!D ]}"tj||"}| j	d2|  qqW n ty, }# z| j	d3|#  W Y d}#~#nd}#~#ww td4| | j	
d5|  t|d6}| }$W d   n	1 sRw   Y  tj|}|
d7t|	}%t d8d9|%}%d:| d;|% d<| }&| !|}'|" d=rd>}'| j#|$|&|'|}(||d?d(d@|(||%|||t$$ dAdB})| j%|)}t|d }*| j&|dC|*idDt$$ dE|*|||dFdGidH | j	
dI|  dJ|*||||&dK| dLW zt'(| W W S  ty } z| j	)dM|  W Y d}~W S d}~ww zt'(| W w  ty& } z| j	)dM|  W Y d}~w d}~ww  tyY } z&t*dN|  zdOt+ v rLtj|rNt'(| W  W  W     Y  d}~ww )Pz9Regenerate a specific bid document based on user feedbackregenerate_bid_documentr  r  r5  rn  r\  Document not found: r   r  rl  zRegenerating document: ro  rp  regenerate_Tr   r   NzDownloaded document locally: c                 S   r  r:   r_  r`  r:   r:   r;   rb  .	  r  z>MinaionsRFPService.regenerate_bid_document.<locals>.<listcomp>r\   r]   r   r  r  r   z Failed to save context document r   r  r  r  r  r  r,  rP  r  r  regenerated_docsregeneratedzCreated output directories: z and g        z+Calling BPA regenerate_document with path: )org_doc_file_pathr  r  r  rJ  z7Failed to regenerate document - no output path returnedzRegenerated file not found at: zContents of :z  Found file: z"Error listing directory contents: z1Regenerated document not found at expected path: z&Successfully regenerated document at: r   r^  ru  r   bid_documents/r=   z/regenerated/r   .htmr   bid_document_regeneratedrx  )bidGenerationIdr^  originalDocumentIdoriginalDocumentName
userPromptregeneratedAtr}  regeneratedDocumentsrW  document_regenerated)r   original_document_idoriginal_filenamer  rg  r  z#Successfully regenerated document: r  z#Document regenerated successfully: )ri  regenerated_document_idr  r  regenerated_filenamer   r   rW  z#Failed to regenerate bid document: r  ),r  r   r   r   rf   r   rl   r   rN   r4  r`   r   r   r   r   r   r   r   r   r   r   rh   rP   r%  rT  r_  regenerate_documentr  r   r  r   r  r  r   r   r   r  r   r   r  r  rg   r_   r  )+rD   r   rF   r   r  rV   r  r   r  rY   rn  r  rt  r  r  r  local_document_pathr   rt   tender_documentsrk   r   r   r  r  r  r  r  regenerated_subdir
regen_costregenerated_doc_pathr   dirsr   r   debug_errorr   r  r   r   r  r  regenerated_doc_idr:   r:   r;   r    sF   

*"


 z*MinaionsRFPService.regenerate_bid_documentr   c                 C   sb   t j|std| | j||}|d sdnd|d |d |d |d d|d  d	d
S )z?Upload company documents to S3 for future use in bid generationz%Local documents path does not exist: upload_errorsr  partialuploaded_filesuploaded_counterror_countz	Uploaded z company documents to S3)ri  r  r  r  r  r   )r   r   r  r   r   r   )rD   rF   r   resultr:   r:   r;   upload_company_documents_to_s3	  s   z1MinaionsRFPService.upload_company_documents_to_s3c              
   C   s  zo|  ||d | j|}|std| t|d |kr'td| t|d }| j||}|s?td|d  ztj| j	d| }tj
|dd d	d
 |dg D }| j|}|D ]C}	| jt|	d |}
|	dd|	d  }|ds| d}tj||}t|d}||
 W d   n1 sw   Y  qf| jd|  tj|dd}td|  tj|st|}nd}|rVd| }d}tj|s| | tj|s| || | |rRt|||}d}t|d}| }W d   n	1 sw   Y  d| d}d}| j||||}| j|d|d|d |did t d!d"|id#id$ d%||d&d'W W S t d(t d) t yp } z
| j!d*|   d}~ww  t y } z	t!d*|   d}~ww )+z>Setup chat functionality for RFP queries using enhanced searchsetup_rfp_chatr5  r~  rn  r\  chat_setup_Tr   c                 S   r  r:   r_  r`  r:   r:   r;   rb  	  r  z5MinaionsRFPService.setup_rfp_chat.<locals>.<listcomp>r\   r]   r  rl  r   r   Nz+Setting up chat functionality for analysis r  chunks.jsonChunk Data file path r;  index_es.jsonr   r   r  /chunks.jsonr>   	chatSetuprR   )bidIdsetupCompleted
chunksPathindexConfigrW  chat_setup_completedbid_idrg  r#  r  z%Chat functionality setup successfully)ri  r  chunks_pathr   Failed to validate chunks data*Failed to process documents for chat setupzFailed to setup RFP chat: )"r  r   r   r   rf   rl   r   r   r   r   r   rN   r   r   r   r   r   r4  r`   r_   r  bqprocess_task_create_default_index_config_convert_chunks_to_json_validate_chunks_datachates_index_datar  r   r   r  r   rh   )rD   r   rF   r  rY   rn  r  rt   r  rk   r  r   r   r   	data_fileri  r  index_config_filetag_responsesr   r   r   r  r  r:   r:   r;   r  	  s   




	
z!MinaionsRFPService.setup_rfp_chatc                 C   s  | j |}|std| t|d }| j ||}|s'td|d  d| d}| j ||d}t|tr?|d}d| }|	d	rQ|d	 	d
st
|dk rtd | j |d	|ddddt didt ddid | j |ddt d|ddddii dS ztd|  | ||d tj| jd| }	tj|	dd dd |	dg D }
| j |
}| jd t| d! |D ]D}| j t|d" |}|	d#d$|d"  }|d%s| d%}tj|	|}t|d&}|| W d   n	1 sw   Y  q| jd' tj|	d(d)}td*|  tj|s:t
|	}nd}|r5d+}tj|sM|  | tj|sZ| !|	| | jd, | "|r1ztt#$|||}d}t|d-}|% }W d   n	1 sw   Y  d| d.}d/}| j &||||}| j |d	|dd|d0 |t didt ddid | j |ddt d|t|dddii | jd1|  W n^ t'y0 } zI| j(d2|  | j |d	|dddt|t d3idt d4t|d5d6did | j |ddt d4t|d5d6dii W Y d}~nd}~ww t'd7t'd8z	t)*|	 W W dS  t'y^ } z| j+d9|  W Y d}~nd}~ww W dS  t'y } ztt(d:| d;|  z?| j |d	d| dddt|t d3idt d4t|d<d6did | j |ddt d4t|d<d6dii W n t'y } zt(d=|  W Y d}~nd}~ww W Y d}~dS W Y d}~dS d}~ww td> | j |ddt d|dd?dii | jd1|  dS )@z1Background task for setting up chat functionalityr5  rn  r\  r  r  Fr  r;  r  r  p  zRFP documents small size...Tr   )r  r  setupInProgressr  r  setupCompletedAtrW  r  rY  r#  rc  r   )r  indexed_chunksrh  rg  Nz,Starting background chat setup for analysis setup_rfp_chat_bgr  r   c                 S   r  r:   r_  r`  r:   r:   r;   rb  
  r  z@MinaionsRFPService.setup_rfp_chat_background.<locals>.<listcomp>r\   zDownloading z documents...r]   r  rl  r   r   z&Processing documents for chat setup...r  r  r  r  z"Starting Elasticsearch indexing...r   r  r>   rR   z/Chat setup completed successfully for analysis zElasticsearch indexing failed: )r  r  r
  setupFailederrorMessagefailedAtchat_setup_failedelasticsearch_indexing)rh   stager  r  "Failed to cleanup temp directory: z*Background chat setup failed for analysis r   background_processingr  z/Chat setup already completed. Reusing the same.)r  rh  ),r   r   r   rf   rl   r   rb   r   decoderN   r  r  r_   r`   rr   r  r   r  r   r   r   r   r   r   r4  r   r   r   r   r   r  r   r  r  r  r  r  r  r   r   rh   r  r  rg   )rD   r   rF   r  rY   rn  r   merged_txt_filer  r  rt   r  rk   r  r   r   r   r  ri  r  r  r   r   r  es_errorcleanup_errorr  r  r:   r:   r;   setup_rfp_chat_background\
  s  







*

,z,MinaionsRFPService.setup_rfp_chat_backgroundquery	client_idc              
   C   s  | j |}|std| t|d |krtd| d| d}| j ||d}t|tr6|d}t	|dk r@d}	nd}	| j 
||}
|
sRtd	| |
d
i }|ds||d
i }|sitd| j |d
|idt ddid |d}|sd| }zntj| jd| }tj|dd}d| d}tj|stj|d}tj|dd | j ||d}t|d}|| W d   n1 sw   Y  t|||||	|}| j |ddt |||dii |W S  ty } z	td|   d}~ww )z8Chat with RFP documents using enhanced search and Clauder5  r~  r  r  Fr  r	  Tr\  r  r  z6Chat functionality not setup. Please setup chat first.rW  r  rY  r#  r  tenant_r  r  r  r  r   r   Nrc  chatHistory)rZ  clientIdr  rS   zFailed to process chat query: )r   r   r   rf   r   rb   r   r  r  r  rl   rN   rr   r  r   r   r   r   r  r   r   r   r  chat_with_rfpr   r   r_   rh   )rD   r   rF   rY   r  r  r  r   r  no_es_queryrn  
chat_setupr  r  r  r  r  r   rS   r  r:   r:   r;   r   z  sx   




z MinaionsRFPService.chat_with_rfpTr  document_categoriesadd_page_numbersc           %      C   s  zs|  ||d |std| j||}|std| |}| j|}|}	|	s/td| jdt|	 d tj	
| jd| }
tj	
|
d}tj|
d	d
 tj|d	d
 tj	
|
d}tj|d	d
 zg }|	D ]}zk| jt|d |}|dd|d  }tj	
|
|}t|d}|| W d   n1 sw   Y  | dr| | | ||}|rtj	|r|| | jd|  n	| jd|  W qo ty } z| jd|d d|  W Y d}~qod}~ww |stdt|D ]!\}}tj	|}|d  d| }tj	
||}t|| q	t|dd}| j||}|rC|ddnd}t !dd|}t"t## d }| d| d }tj	
||}| jd! t$%||| tj	|s{td"t|d#}|& } W d   n	1 sw   Y  d$| d%| d&| }!| j'| |!d'|}"||d(d)d*|"||t|	t|||pd+t## |d,d-}#| j(|#}t|d }$| j)|d.|$id/t## d0|$t|	t|||d1d2id3 | jd4t|	 d5t| d6|  d7|$|t|	t|t| |"d8d4t|	 d9t| d:d;W zt*| t*|
 W W S  tyK } z| jd<|  W Y d}~W S d}~ww zt*| t*|
 W w  tyt } z| jd<|  W Y d}~w d}~ww  ty } z
| jd=|   d}~ww )>a  
            Merge bid documents into a single PDF with index
            
            Args:
                bid_generation_id: Bid generation record ID
                tenant_id: Tenant ID
                generated_document_ids: List of generated document IDs to merge
                document_categories: List of document categories to include (optional)
                add_page_numbers: Whether to add page numbers to merged PDF
                
            Returns:
                Dict containing merged document information
            merge_bid_documentsz"No generated document IDs providedr  zNo documents found to mergeMerging z) documents (will convert non-PDFs to PDF)merge_	convertedTr   r$  r]   r  rl  r   Nr   zSuccessfully processed: zFailed to convert document: zFailed to process document r   z/No documents were successfully converted to PDFrv  r   rn  r   r^  bidru  i  _merged_documents_r   zStarting PDF merge process...*PDF merge failed - output file not createdr   r  r=   /merged/r   bid_document_mergedmergedrx  all)r  r^  mergedDocumentCountconvertedDocumentCountaddedPageNumbersmergedCategoriesmergedAtprovidedDocumentIdsr}  mergedDocumentsrW  documents_merged)merged_document_idr  converted_countoutput_filenameprovided_document_idsrg  r  Successfully merged z documents (z converted to PDF) into r  rR   z documents (converted z to PDF))ri  r8  r   r  r9  	file_sizes3_urlr   r  zFailed to merge bid documents: )+r  r   r   r   r   r4  r`   r   r   r   r   r   r   r   rf   rN   r   r   r   r   _clean_html_file_convert_to_pdfr  re   rg   r   rh   r5  r   r  r  rl   r  r  r   r  
pdf_mergermerge_pdfs_with_indexr  r   r   r   r  )%rD   r   rF   r  r#  r$  r  generated_doc_idsr  processable_documentsr  conversion_dirprocessed_dirconverted_pdf_filesrk   r  r   original_file_pathr   converted_pdf_pathr  indexpdf_filer   new_filename	dest_pathrY   rn  r  rZ  r:  output_pathmerged_contentr   r  r  merged_doc_idr:   r:   r;   r%  ~  s   

*
&
 
z&MinaionsRFPService.merge_bid_documents2   target_size_percentpreserve_qualityc                 C   s  z|  ||d |dvrtd| j|g}|s!td| |d }|dd| }| ds9td	| jd
| d| d t	j
| jd| }	t	j|	dd z<| j||}
t|
}t	j
|	|}t|d}||
 W d   n1 sw   Y  t	j
|d }| d| d}t	j
|	|}| jd tj|||d|d\}}}|std|dd t|d}| }W d   n1 sw   Y  t|}|| | d }|di }|dd}tdd |}d!| d"| d#| }| j||d$|}||d%d&d'|i ||||||||t d(d)}| j|}t|d* }| j|d+|id,t d-|d.||d/d0id1 | jd2|d3d4 d5||||||||d6d|d7|d8| d9|d3dd:W zt|	 W W S  ty } z| j d;|  W Y d}~W S d}~ww zt|	 W w  ty } z| j d;|  W Y d}~w d}~ww  ty } z
| j!d<|   d}~ww )=a  
        Compress a PDF document
        
        Args:
            document_id: Document ID to compress
            tenant_id: Tenant ID
            target_size_percent: Target size as percentage of original (25, 50, or 75)
            preserve_quality: Whether to prioritize quality over exact size target
            
        Returns:
            Dict containing compressed document information
        compress_document   rQ  K   $Target size must be 25%, 50%, or 75%r  r   r  rl  r   z$Only PDF documents can be compressedzCompressing document: z to %	compress_Tr   r   N_compressed_percent.pdfzStarting PDF compression...r  )max_attemptsrS  zPDF compression failed: rh   zUnknown errorr   rv  r  r^  rt  ru  r   r  r=   /compressed/r   bid_document_compressed
compressedrx  )r  originalSizecompressedSizecompressionRatiotargetSizePercentpreserveQualitycompressionStatscompressedAtr}  r]   compressedDocumentsrW  document_compressedr   )compressed_document_idr  r:  provided_document_idrg  r  z"Successfully compressed document: .1f% reductionr  achieved_percentrR   Compressed z by )ri  r   rj  r  compressed_filenameoriginal_sizecompressed_sizecompression_ratiotarget_achievedr>  compression_statsr   r  zFailed to compress document: )"r  r   r   r   rN   r   r   r4  r`   r   r   r   r   r   r   r   r   r   r   rA  compress_pdfr   r  r  r  r   r  r   rf   r   r  r  rg   rh   )rD   r   r   rF   rR  rS  r  rt  r  r  r  rq  
input_pathr   	base_namer:  rN  rh  
final_pathstatscompressed_contentrr  rs  r  r  r   r  r  rk   compressed_doc_idr  r:   r:   r;   rT    s   

 z$MinaionsRFPService.compress_documentc           &         sb  z|  ||d |dvrtd| j||}|s!td| |dg }|s-td| j|} r> fdd|D }d	d |D }	|	sKtd
| jdt|	 d| d t	j
| jd| }
t	j|
dd zg }i }|	D ]~}z[| jt|d |}|dd|d  d}| ds|d7 }t	j
|
|}t|d}|| W d   n1 sw   Y  || t|d ||< | jd|  W qt ty } z| jd|d d|  W Y d}~qtd}~ww |std| jd t|
|d g }d}d}t	|
D ]}|d| dr|d| dd}||}|s1qt	j
|
|}t	j
|
|}t	j
|rt	j
|rt|d }| }W d   n	1 sbw   Y  t	j
|}t|}|| | d! }||7 }||7 }t|d"d#}| j||}|r|d$d%nd%} t d&d'| } d(| d)|  d*| }!| j!||!d+|}"||d,d-d.|"|| |||||||t"" d/
d0}#| j#|#}t|d }$||$|||||||"d1d2 | jd3| d|d4d5 q|dkr|| | d! nd}%| j$|d6d7t"" d8t|||||%d9d:ii | jd;t| d< | jd=|%d4d5 d>|t||||%|d?t| d@|%d4dAdBW zt%&|
 W W S  tyu } z| j'dC|  W Y d}~W S d}~ww zt%&|
 W w  ty } z| j'dC|  W Y d}~w d}~ww  ty } z
| jdD|   d}~ww )Ea  
        Batch compress all PDF documents in a bid generation
        
        Args:
            bid_generation_id: Bid generation record ID
            tenant_id: Tenant ID
            target_size_percent: Target size as percentage of original (25, 50, or 75)
            preserve_quality: Whether to prioritize quality over exact size target
            document_categories: List of document categories to compress (optional)
            
        Returns:
            Dict containing batch compression results
        batch_compress_documentsrU  rX  r  r  z(No generated documents found to compressc                    s   g | ]}| d  v r|qS )r   )rN   r  r#  r:   r;   rb    s    zCMinaionsRFPService.batch_compress_bid_documents.<locals>.<listcomp>c                 S   &   g | ]}| d d dr|qS r  r   r   rN   r   r   r  r:   r:   r;   rb       & z"No PDF documents found to compresszBatch compressing z PDF documents to rY  batch_compress_Tr   r]   r  rl  r   r   NzDownloaded for compression: Failed to download document r   )No documents were successfully downloadedz!Starting batch PDF compression..._compressedr   r[  r\  r   rv  rn  r   r^  r)  ru  r   r  r=   r^  r   r_  r`  rx  )
r  r^  r  originalFilenamera  rb  rc  rd  re  rg  r}  rR   )rj  r  r  rp  rq  rr  rs  r>  ro  rl  rm  rc  rW  batch_documents_compressed)compressed_countrR  total_original_sizetotal_compressed_sizeoverall_compression_ratiorg  zSuccessfully batch compressed rk  zOverall compression: r  zSuccessfully compressed z documents with z% overall reduction)ri  compressed_documentstotal_documentsr  r  r  rR  r   r  z$Failed to batch compress documents: )(r  r   r   r   rN   r   r4  r`   r   r   r   r   r   r   r   rf   r   r   r   r   re   r   rh   rA  batch_compress_pdfslistdirr   r  r  getsizerl   r  r  r   r  r   r   r  r  rg   )&rD   r   rF   rR  rS  r#  r  rC  r  pdf_documentsr  r  doc_id_mappingrk   r  r   r   r   r  compressed_docsr  r  r  original_doc_idcompressed_pathr0  r{  rq  rr  rs  rY   rn  r  r   r  r  r|  overall_compressionr:   r~  r;   batch_compress_bid_documents  s$  
*

 z/MinaionsRFPService.batch_compress_bid_documentsinclude_linked_docsc                 C   s  z|  ||d | j||}|std| dd |dg D }|s*td| j|}|r:dd |D }ndd |D }|sGtd	| jd
t| d t	j
| jd| }	t	j|	dd zgg }
|D ]v}zS| jt|d |}|dd|d  d}| ds|d7 }t	j
|	|}t|d}|| W d   n1 sw   Y  |
| | jd|  W qk ty } z| jd|d d|  W Y d}~qkd}~ww |
std|dd}tdd|}| d}t	j
|	|}| jd t|	|| t	j
|stdt|d }| }W d   n	1 s.w   Y  d!| d"| d#| }| j||d$|}||d%d&d'|||t|||t d(d)}| j|}t|d }| j |d*|id+t d,|t||d-d.id/ | jd0t| d1|  d2||t|t||d3|d0t| d4d5W zt!"|	 W W S  ty } z| j#d6|  W Y d}~W S d}~ww zt!"|	 W w  ty } z| j#d6|  W Y d}~w d}~ww  ty } z
| jd7|   d}~ww )8a  
        Merge all tender/RFP documents into a single PDF with index
        
        Args:
            tender_id: Tender ID
            tenant_id: Tenant ID  
            add_page_numbers: Whether to add page numbers to merged PDF
            include_linked_docs: Whether to include linked documents
            
        Returns:
            Dict containing merged document information
        merge_tender_documentsr\  c                 S   r  r:   r_  r`  r:   r:   r;   rb    r  z=MinaionsRFPService.merge_tender_documents.<locals>.<listcomp>r\   zNo documents found in tenderc                 S   r  r  r  r  r:   r:   r;   rb    r  c                 S   sB   g | ]}| d d dr| ddkr| ddkr|qS )r  r   r   r   rn  ra   rw  r  r  r:   r:   r;   rb    s    zNo PDF documents found to merger&  z tender PDF documentsmerge_tender_Tr   r]   r  rl  r   r   NzDownloaded: r  r   r  r^  rn  ru  r   z_merged_tender_documents.pdfz$Starting tender PDF merge process...r+  r   rv  r=   r,  r   tender_mergedr.  rx  )rz  r^  r0  r2  includeLinkedDocsr4  r}  r6  rW  tender_documents_merged)r8  r  r:  rg  r  r<  z tender documents into r  rR   z tender documents)ri  r8  r   r  r=  r>  r  r   r  z"Failed to merge tender documents: )$r  r   rl   r   rN   r   r4  r`   r   r   r   r   r   r   r   rf   r   r   r   r   re   r   rh   r  r  rA  rB  r  r  r   r  r   rr   r  r  rg   )rD   rY   rF   r$  r  rn  rt   r  r  r  r  rk   r  r   r   r   r  r  r:  rN  rO  r   r  r  rP  r:   r:   r;   r  o  s   
*

 z)MinaionsRFPService.merge_tender_documentsr  r  c              
   C   s   t d t d|  t dtj|rt|nd  |rZt dt| d | D ]'\}}|rA|dd d	d
nd}t dtj	| dt| d| d q1nt 
d t d dS )z&Debug helper to log extraction resultsz&=== Document Extraction Debug Info ===zTemp directory: zFiles in temp directory: zDirectory not foundzExtracted text from z documents:Nrx   r	  r  z
No contentr3  r   z	 chars - ...z*No documents_text returned from extractionz=== End Debug Info ===)r_   r`   r   r   r  r  r   r7  r   r   rg   )rD   r  r  doc_pathr   content_previewr:   r:   r;   r[    s   
&,
z,MinaionsRFPService._debug_extraction_resultsr  c           	   
   C   s4  t j|d}t j|rzot|ddd}t|}W d   n1 s%w   Y  tdt	| d t
|tru|rui }| D ]\}}t
|trT| rT|||< qBtd|  qB|rmtd	t	| d
 |W S td W i S td W i S  ty } ztd|  W Y d}~i S d}~ww i S )z5Check and load results from document extractor outputr  r   r  r  Nz"Found existing doc_text.json with z entrieszSkipping invalid entry for zLoaded z) valid documents from existing extractionz0No valid content found in existing doc_text.jsonz.doc_text.json exists but contains invalid dataz&Error reading existing doc_text.json: )r   r   r   r  r   rP   r   r_   r`   r   rb   rd   r7  rf   striprg   r   rh   )	rD   r  doc_text_filer   rK   valid_entriesr   r  r  r:   r:   r;    _check_document_extractor_output%  s4   
z3MinaionsRFPService._check_document_extractor_outputr   c              
   C   s8   t j|d  }ddddddddd	d
	}||dS )r   r   r   r   r   r   zapplication/mswordr   zapplication/vnd.ms-excelr>   )	r   r   r  r   r   .docr   .xlsr   r   r   r   r:   r:   r;   r   G  s   z$MinaionsRFPService._get_content_typer  r  c                 C   sP  g }t |D ]\}}}|D ]}	t j||	}
|	ds!|	dr"qzet|
d}| }W d   n1 s7w   Y  t	dd|}t j
|
|}d| d| d| }| |	}| j||||}||	d	t j|d
||||dd}| j|}t|d }|| W q ty } ztd|	 d|  W Y d}~qd}~ww q|S )z>Save generated bid documents to S3 and create database recordsr   r   r   Nru  r   r  r=   rm  rx  )r  r^  relativePathr}  r]   z"Failed to save generated document r   )r   r   r   r   r   r   r   r  r  r  relpathr   r   r   r   r   rf   re   r   r_   rh   )rD   r  r   rF   r  rt   r   r  r   r   r   r   r   rel_pathr   r   r  rt  rk   r~   r  r:   r:   r;   r  W  sJ   


",z2MinaionsRFPService._save_generated_documents_to_s3r  r  c                 C   s  g }i }|D ]'}| d}	|	r-| d| d| d| dd| d| dd	||	< q|D ]}
z|
 d}	| |	}|sM| jd
|
 d  W q0|d }ddl}|j|}|jd}d|jv rg	 t	
dd|}|||d}| ddur|d |d< | dr|d |d< ||
 d|d d|
 d|d d|||dd|
 dd|
 d| dddd}| j|}t|d }|| | jd|
 d d| d W q0 ty } ztd|
 d d|  W Y d}~q0d}~ww |S )a   
        Create database entries for referenced company documents without uploading to S3.
        Uses the original S3 URL from the company documents.

        Args:
            referenced_company_docs: List of referenced company documents from bid preparation
            bid_generation_id: Bid generation ID
            tenant_id: Tenant ID
            bid_number: Bid number
            downloaded_company_docs: List of downloaded company documents with S3 URLs

        Returns:
            List[str]: List of created document IDs
        
local_pathr>  r   r   r-  r   r  r  )r>  r   r   r-  r  r  z/Could not find S3 URL for referenced document: r  r   Nr=   z.s3.ru  r   )r   rR   r   rm  rx  Trequired_doc_name)r  r^  isReferencedDocumentoriginalCompanyDocrequiredDocNamer-  r}  r]   z#Created referenced document entry: ro  rp  z/Failed to create referenced document entry for r   )rN   r4  rg   urllib.parseparseurlparser   lstriphostnamer  r  r   r   rf   re   r`   r   r_   rh   )rD   r  r   rF   r  r  rt   local_path_to_s3company_docr  ref_docs3_infor>  urllib
parsed_urlr   bid_number_safer  rt  rk   r~   r  r:   r:   r;   r    st   


	





$(z6MinaionsRFPService._create_referenced_document_entriesc                 C   s   ddddddiddidd	d
ddiddd}t jt j|dd t|d}tj||dd W d   dS 1 s=w   Y  dS )z0Create default Elasticsearch index configurationr  r   )number_of_shardsnumber_of_replicastrueenabledra   r|   dense_vectori   )ra   dimskeyword)r|   title_vectortag)dynamic_source
properties)settingsmappingsTr   r  r  N)r   r   r   dirnamer   rP   r%  )rD   r   r   r   r:   r:   r;   r    s(   "z/MinaionsRFPService._create_default_index_configoutput_filec           
   	   C   s   ddl }tj|dd}tj|rZ||}g }| D ]\}}||dd|dd|ddd	 qt	|d
dd}	t
j||	ddd W d   dS 1 sSw   Y  dS dS )z>Convert Excel chunks to JSON format for Elasticsearch indexingr   Nr  zchunks.xlsxTagr   questionanswertagNamer  r  r  r  r  r  Fr  )pandasr   r   r   r  
read_exceliterrowsre   rN   r   rP   r%  )
rD   r  r  pd
excel_filedf	json_datar   rowr   r:   r:   r;   r    s   




"z*MinaionsRFPService._convert_chunks_to_json
process_idprocess_typec                 C   sd   || _ || _|| _td| d| |||| _t| dr$| jr$| j| j_| jd| d|  dS )z
        Set up process context for logging
        
        Args:
            tenant_id: Tenant ID
            process_id: Process ID (tender_id, analysis_id, etc.)
            process_type: Process type (discovery, analysis, bid_generation)
        zrfp_service.r   r   z	Starting z process for N)r   r   r  r   r4  hasattrr   r`   )rD   rF   r  r  r:   r:   r;   r  *  s   	
z)MinaionsRFPService._setup_process_contextdoc_infoc                    st   | dd }| dd  | dd g d}||v s6t fdd|D s6tfdd|D r8d	S d
S )a  
        Categorize company document as standard or experience based on type and content
        
        Args:
            doc_info: Document information dictionary
            
        Returns:
            str: Category ("standard" or "experience")
        ra   r   r  r  )r)   r   r'  
work_order
completion	portfolior   r&  c                 3       | ]}| v V  qd S r   r:   ra  	indicatorr  r:   r;   rE  [      zBMinaionsRFPService._categorize_company_document.<locals>.<genexpr>c                 3   r  r   r:   r  )	doc_valuer:   r;   rE  \  r  r   r   )rN   r   any)rD   r  r/  experience_indicatorsr:   )r  r  r;   _categorize_company_documentE  s   
z/MinaionsRFPService._categorize_company_document
target_dirc              
   C   s(  z| d}|sW dS | dd}tdd| dd}d	dl}|j|}tj	
|j	d
 p2d}| | }	tj	||	}
|
d }tj |dd}|  t|
d}||j W d   n1 sew   Y  d}| dr||d  r||d }nLg }| dr|d|d   | dr|d|d   | dr|d|d   dD ]}||v r|dtj|| dd  qd|}t|ddd}|| W d   n1 sw   Y  | jd|	  |
W S  ty } z| jd| dd  d!|  W Y d}~dS d}~ww )"a$  
        Download company document and create .desc file
        
        Args:
            doc_info: Document information dictionary
            target_dir: Target directory to save the document
            
        Returns:
            str: Path to saved document or None if failed
        r>  Nr  rt  z[^\w\s-]r   r  r   r   r   r   .desc   )timeoutr   r-  zName: ra   zType: r  zValue: )director_dataexperience_dataitr_dataca_cert_databalance_sheet_datacertificate_datazData: r  r  r	  r  r  r  zDownloaded company document: z$Failed to download company document unknownr   )rN   r  r  r  r   r  r  r  r   r   r   r   rM   raise_for_statusr   r   r   re   rP   r8  r4  r`   r   rh   )rD   r  r  r>  r  safe_filenamer  r  file_extensionr   r   	desc_pathrS   r   description_content
desc_partsr   r  r:   r:   r;   #_download_and_save_company_documentb  sT   





 z6MinaionsRFPService._download_and_save_company_documentc           
   
   C   s  t d|  	 zt|ddd}t|}W d    n1 s!w   Y  g }d}t|D ]r\}}t|tsDtd| d |d7 }q.d	|vsL|d	 sYtd| d
 |d7 }q.d|vsa|d sntd| d |d7 }q.d|vsv|d std| d |d7 }q.t	|d	 
 t	|d 
 t	|d 
 d}|| q.tdt| d| d |rt|ddd}tj||ddd W d    n1 sw   Y  tdt| d t|dkW S  ty }	 ztd|	  W Y d }	~	dS d }	~	ww )NzValidated Data file path r   r  r  r   zChunk z: Not a dictionaryr   r  z: Missing or empty tagNamer  z: Missing or empty questionr  z: Missing or empty answerr  zValidation complete: z valid chunks, z invalid chunksr  r  Fr  zWrote z cleaned chunks back to filezError validating chunks data: )r_   r`   r   rP   r   r5  rb   rd   printrf   r  re   r   r%  r   )
rD   r  r   rK   valid_chunksinvalid_countrA  chunkcleaned_chunkr  r:   r:   r;   r    sT   
z(MinaionsRFPService._validate_chunks_datarR   c                    s\    sdS   drt fdddD sdS  ddk rdS g d}t fd	d|D S )
z
        Check if URL is a valid file URL (not just a website)
        
        Args:
            url: URL to check
            
        Returns:
            bool: True if it's a file URL, False if it's a website
        F)zhttp://www.zhttps://www.c                 3       | ]	}|   v V  qd S r   r   )ra  r   rR   r:   r;   rE    rF  z8MinaionsRFPService._is_valid_file_url.<locals>.<genexpr>)r   r  r  .jpg.pngr=      )
documents/zfiles/zuploads/zattachments/r   r  r   r  r   r  r  z.zipc                 3   r   r   r  r  r  r:   r;   rE    rF  )r   r  count)rD   rR   file_indicatorsr:   r  r;   _is_valid_file_url  s   
 z%MinaionsRFPService._is_valid_file_urlr  r  c           6   
   C   sB  z| j ||}|di }|di }tj|d}tj|d}	tj|dd tj|	dd g }
|r| jd t	 }|
 D ]\}}| jd|  |d	}|r[| |s\qB||v raqB|| |d
}|dd\}}tdd| d}tdd| d}| dr|	}ddi}n|}ddi}tj|di dd}|ddp| d}|||||d|d}| |||}|r|
| qBn| jd |dg }t|D ]Z\}}|d}|r| |sq|dd|d  }|dd}|d d}t|i }||||dd|d|d}|dd}|dkr)|	n|}| |||}|r:|
| q|d!g } t| D ]X\}}!|!d}|rW| |sYqEtd"i }|!dd#|d  }"td$d|" d%d}#d#|# d&d"d'|!dd( d|d"||!d)}| |||}|r|
| qE|d*g }$t|$D ][\}}%|%d}|r| |sqtd+i }|%d,d-|d  }&td$d|& d%dd.d/ }'d0|' d+d1|%d,d d|d+||%d2}| ||	|}|r|
| q|d3i }(|(d4g })t|)D ]F\}}*|*d}|r&| |s(qtd5i }|*d6d7|d  }+d8|+ d5d9|+ d|d5||*d:}| |||}|rY|
| q|(d;g },t|,D ]>\}}-|-d}|rw| |syqetd<i }d=|d  d<|-d>dd|d<||-d?}| |||}|r|
| qe|(d@g }.t|.D ]F\}}/|/d}|r| |sqtdAi }|/dBd7|d  }0dC|0 dAdD|0 d|dA||/dE}| |||}|r|
| q|dFg }1t|1D ]T\}}2|2d}|r| |sqtdFi }|2ddG|d  }3td$d|3 d%dd.d/ }4dG|4 dF|3d|dF||2dH}| |||}|rR|
| q| jdIt|
 dJ | jdKtdLdM |
D   | jdNtdOdM |
D   |
W S  ty }5 z| jdP|5  g W  Y d.}5~5S d.}5~5ww )Qa  
        Download and organize company documents using centralized mapping (FIXED VERSION)
        
        Args:
            tenant_id: Tenant ID
            temp_company_docs_dir: Temporary directory for company documents
            formatted_company_info: Formatted company information for fallback descriptions
            
        Returns:
            List[Dict]: List of downloaded company documents with metadata
        r  r  Standard_DocumentsExperience_DocumentsTr   z)Processing documents from raw_analysis...zRaw Analysis Item...r   r/  r  r   z
[^a-z0-9]+r   r   r   r   r0  F)r!  r-  r   z document from raw_analysis)r  ra   r  r-  r>  r{  mapping_configz@Starting company documents download using centralized mapping...r  r   r  detail_doc_ra   basic_detailsr  r  r(   	director_
[^\w\s\-_]r  	_documentz
Director: Unknown)r  ra   r  r-  r>  r{  r  r  r%  r)   r'  project_NrQ  experience_z	Project: )r  ra   r  r-  r>  r{  r  r  r  r  r%   r  year_ITR_z
ITR Year: )r  ra   r  r-  r>  r{  r  r  r   r&   CA_Certificate_r!  )r  ra   r  r-  r>  r{  r  r  r"  r'   r#  Balance_Sheet_r$  )r  ra   r  r-  r>  r{  r  r  r*   Certificate_)r  ra   r  r-  r>  r{  r  r  z&Successfully downloaded and organized r  zStandard documents: c                 S      g | ]
}|d  dkr|qS )r   r   r:   ra  dr:   r:   r;   rb        zOMinaionsRFPService._download_and_organize_company_documents.<locals>.<listcomp>zExperience documents: c                 S   r  )r   r   r:   r  r:   r:   r;   rb    r  z3Failed to download and organize company documents: )r   rW   rN   r   r   r   r   r4  r`   r6  r7  r	  r9  rD  r  r  r   r  r   rP   r8  /_download_and_save_company_document_improved_v2re   r5  COMPANY_DOCUMENT_MAPPINGr   r   r   rh   )6rD   rF   r  r  rV   r:  r  raw_analysis_docsstd_docs_direxp_docs_dirr  
seen_filesr   rar   raw_doc_type	type_part	name_partr/  r  r  r  key_info_strr-  r  
saved_infor<  rA  r=  r>  r  r   r(   rB  director_namesafe_director_namer)   r   project_namesafe_project_namerC  rF  rG  itr_yearrH  rI  rJ  rK  bs_yearrL  rM  	cert_descsafe_cert_namer  r:   r:   r;   r    s  





.






"








"
  z;MinaionsRFPService._download_and_organize_company_documentsr>  default_namec           	      C   s   zCddl }|j|}|j}tj|}|rAt|dkrA|dsAtj|\}}t|dkr>|	dd	dd
 r>|W S |W S |W S  tyN   | Y S w )	a  
        Extract meaningful filename from URL, fallback to default name
        
        Args:
            file_url: URL of the file
            default_name: Default name to use if extraction fails
            
        Returns:
            str: Meaningful filename
        r   Nr  r      rA  r   r   )r  r  r  r   r   r   r   r   r   r   isalnumr   )	rD   r>  r2  r  r  r   r   r'  r   r:   r:   r;   _extract_filename_from_url$  s   $z-MinaionsRFPService._extract_filename_from_urlc              
   C   s  z,| d}|s| jd| dd  W dS | dd}| ||}||kr9t|dk r9tdd	| }n(td
d	| dd}ddl	}|j
|}	tj|	jd }
|
s]d}
||
7 }tj||}|d }| jd|  tj |ddd}|  d}|j dd}d|v r|dd  }t|d}|jddD ]}|| |t|7 }qW d   n1 sw   Y  d|jv rz	t|jd }W n ttfy   Y nw | |}t|ddd}|| W d   n1 sw   Y  | di }| d d!}| jd"| d#| d$| d%|  |||||| d&d| d'd||d(	W S  tjjyS } z| jd)| dd d*|  W Y d}~dS d}~w tyv } z| jd+| dd d,|  W Y d}~dS d}~ww )-a  
        IMPROVED VERSION - Download and save company document with concise, relevant descriptions
        
        Args:
            doc_info: Document information dictionary with mapping_config
            target_dir: Target directory to save the document
            formatted_company_info: Formatted company information for fallback descriptions
            
        Returns:
            Dict: Simple document information with local path or None if failed
        r>  z No file URL found for document: r  r  Nrt  rQ  z[^\w\s\-_\.]r   r  r  r   r   r   r   r  zDownloading document: <   T)r  streamzcontent-typer   ;r   i    )
chunk_sizezcontent-lengthr  r  r  r  r   r   z*Successfully downloaded company document: z (z	), size: z bytes, mimeType: r{  ra   )	r  r>  r   r   r-  r{  ra   r  r  r  z: Network error - r  r   ) rN   r4  rg   r5  r   r  r  r  r   r  r  r  r   r   r   r   r`   rM   r  rC   rD  r   iter_contentr   r   r   	TypeError(_create_concise_description_from_mapping
exceptionsRequestExceptionrh   r   )rD   r  r  r  r>  r  url_filenamer  r  r  r  r   r  rS   r=  	mime_typer   r  r  r  r   r  r:   r:   r;   r  F  s~   



$

  zBMinaionsRFPService._download_and_save_company_document_improved_v2c              
   C   s  | di }| dd}|rz| dd| dd| ddd}| d	r@|d	 }|| dd| d
d| ddd nr| dr_|d }|| dd| dd| ddd nS| drt|d }|d| ddi n>| dr|d }|d| ddi n)| dr|d }	|d|	 ddi n| dr|d }
|d|
 ddi |jdi |}|W S  ty } z| jd|  W Y d}~nd}~ww | dd}| dd }|r|S |r|S d| dd S ) a  
        Create CONCISE description using mapping template - NO long company info repetition
        
        Args:
            doc_info: Document information dictionary
            
        Returns:
            str: Concise description using template
        r  r   r   r  r2  r  ra   )r  r  ra   r  r  r  )r  r  r  r  r'  r&  r)  )r'  r&  r  r  yearr  r  r#  r  r-  r!  r  zTemplate formatting failed: Nz
Document: r  r:   )rN   updater   r   r4  rg   r  )rD   r  r  r   template_varsrB  r   rG  bsrI  rM  formatted_descriptionr  r  existing_descr:   r:   r;   r<    sn   

















z;MinaionsRFPService._create_concise_description_from_mappingc                    s   | dd  | dd }| dd }t  p!t |}|r*| ddS g d}|dv sA||v sAt fd	d
|D rCdS dS )a  
        Categorize company document using centralized mapping (backward compatibility)
        
        Args:
            doc_info: Document information dictionary
            
        Returns:
            str: Category ("standard" or "experience")
        r  r   ra   r{  r   r   )	r)   r   r'  r  r  r  r   r&  contract)r)   c                 3   r  r   r:   r  r  r:   r;   rE    r  zKMinaionsRFPService._categorize_company_document_improved.<locals>.<genexpr>r   )rN   r   r  r  )rD   r  r/  
doc_sourcer  r  r:   r  r;   %_categorize_company_document_improved  s   
z8MinaionsRFPService._categorize_company_document_improvedr   c           $      C   s$  zpt j|}t j|d }t j|| d}t j|d  }|dkr1t|| |W S |dv rkz!ddl}dddddd}|j	|||d	 | j
d
|  |W W S  tyj   | j
d|  Y W dS w |dv rRzddl}	ddlm}
m}m}m}m} ddlm}m} ddlm} ddlm} |	|}|
||d}g }| }|jD ]t}|	j||d}||d| |d  ||dd |j ! g|"d#t$j%!  }t&|d dkrdd |D }||}|'|ddd|j(fddd|j)fddd d!d"dd#d$|j*fd%dd$d|j+fg	 || ||dd& q|,| | j
d'|  |W W S  t-yQ } z| j
d(| d)|  W Y d}~W dS d}~ww |d*v rz]dd+l.m/} dd,lm}
m}m} dd-lm} ddlm} ||}|
||d}g }| }|j0D ]}|j12 r|||j1|d.  ||dd q|,| | j
d/|  |W W S  t-y } z| j
d0| d)|  W Y d}~W dS d}~ww |d1v rfzidd2lm}
m} dd-lm} ddlm} t3|d3d4d5} | 4 }!W d   n	1 sw   Y  |
||d}| }|!5d6}"g }|"D ]}#|#2 r1|||#2 |d.  q|,| | j
d7|  |W W S  t-ye } z| j
d8| d)|  W Y d}~W dS d}~ww | j
6d9|  W dS  t-y } z| j
d:| d)|  W Y d}~dS d}~ww );a(  
            Convert various file formats to PDF
            
            Args:
                file_path: Path to the input file
                output_dir: Directory to save the converted PDF
                
            Returns:
                str: Path to the converted PDF file
            r   r   r   r  NLetter2cm)z	page-sizez
margin-topzmargin-rightzmargin-bottomzmargin-left)optionsz$Converted HTML to PDF using pdfkit: z&Pdfkit available for HTML conversion: )r  r   )SimpleDocTemplateTable
TableStyle	ParagraphSpacer)letterA4)colors)getSampleStyleSheet)pagesize)
sheet_namezSheet: Heading1   r      c                 S   s.   g | ]}t |d kr|dd  dg n|qS )rZ  Nr  )r   )ra  r  r:   r:   r;   rb  g  s   . z6MinaionsRFPService._convert_to_pdf.<locals>.<listcomp>
BACKGROUNDr   r   r   	TEXTCOLOR)ALIGNr\  r^  r^  CENTER)FONTNAMEr\  r]  zHelvetica-Bold)FONTSIZEr\  r]     )rd  r   r   ra  rZ  )BOTTOMPADDINGr\  r]  rY  rf  ra  GRIDr3  zConverted Excel to PDF: zFailed to convert Excel to PDF r   )r  r   )Document)rM  rP  rQ  )rR  NormalzConverted Word to PDF: zFailed to convert Word to PDF )r   )rM  rP  r   r  r  z

zConverted text to PDF: zFailed to convert text to PDF z(Unsupported file format for conversion: zFailed to convert file to PDF )7r   r   r   r   r   r   r  copy2pdfkit	from_filer4  r`   ImportErrorrh   r  reportlab.platypusrM  rN  rO  rP  rQ  reportlab.lib.pagesizesrR  rS  reportlab.librT  reportlab.lib.stylesrU  	ExcelFilesheet_namesr  re   columnstolistfillnaastyperf   rf  r   setStylegrey
whitesmokebeigeblackbuildr   docxri  
paragraphsr|   r  r   r  rD  rg   )$rD   r   r  r   name_without_extrN  r  rl  margin_optionsr  rM  rN  rO  rP  rQ  rR  rS  rT  rU  r  rk   elementsstylesrW  r  rK   tabler  ri  doc_worddoc_pdf	paragraphr   text_contentr  parar:   r:   r;   r@    s   


"









z"MinaionsRFPService._convert_to_pdfc              
   C   s   zZt |ddd}| }W d   n1 sw   Y  tjdd|tjd}tdd|}t |d	dd}||  W d   n1 sFw   Y  | jd
t	j
|  W dS  tyy } z| jd| d|  W Y d}~dS d}~ww )z
        Clean HTML file by removing markdown code block syntax (```html and ```)
        
        Args:
            file_path: Path to the HTML file to clean
        r   r  r  Nz```htmlr   )flagsz```r  zCleaned HTML file: zFailed to clean HTML file r   )r   r  r  r  
IGNORECASEr   r  r4  r`   r   r   r   r   rg   )rD   r   r   r   r  r:   r:   r;   r?    s   
 &z#MinaionsRFPService._clean_html_filec                 C   s   | d}dd |D }|std|d dd}| dd	\}}td
| td| ||d }d||d	 d }d| d| d| }	dt dt d|	 }
|
S )zv
        Transform local file_path into S3 public URL.
        Handles dynamic sub-paths after company_<id>/.
        r=   c                 S   s   g | ]	}| d r|qS )r  )r   )ra  pr:   r:   r;   rb    s    z;MinaionsRFPService._transform_file_path.<locals>.<listcomp>zCompany ID not found in pathr   r  r   r   r   zCompany ID:z
Tenant ID:Nr  z/company_docs/zhttps://r   z.linodeobjects.com/)rD  r   r   r  rJ  r   r   r   )rD   r   partscompany_partr  rV   rF   company_indexdynamic_pathr   r>  r:   r:   r;   r    s   


z'MinaionsRFPService._transform_file_pathc                    sb   t  }t|d t fdddD ]}|| qW d   | S 1 s(w   Y  | S )z
        Calculate SHA-256 hash of file content
        
        Args:
            file_path (Path): Path to the file
            
        Returns:
            str: Hexadecimal hash of the file content
        r   c                      s
     dS )Ni   )r  r:   r   r:   r;   <lambda>  s   
 z9MinaionsRFPService._calculate_file_hash.<locals>.<lambda>    N)hashlibsha256r   iterrB  	hexdigest)rD   r   sha256_hash
byte_blockr:   r  r;   r    s   

z'MinaionsRFPService._calculate_file_hash)r   r   )NT)rQ  T)rQ  TN)TT)2r,   r-   r.   r/   rf   rE   rT  r   r   rU  r  r  r  r   r4  rl  r  r  r  r  r  r  r   r   r%  r   rT  r  r  r[  r  r   r  r  r  r  r  r  r  r  r	  r  r5  r  r<  rI  r@  r?  r  r  r:   r:   r:   r;   r     s   &  G x% " 0)  $  . 4 =

 ^x  $   

   

 

 T

 '"


4
mG7
  )
"
bT  *,r   __main__);r   rP   r  loggingr   pathlibr   typingr   r   r   r   r   r	   r  rM   r  r
   tempfiler  r  r  r   botocore.exceptionsr   log_forwarderr   bid_prep_automationr_  bid_queriesr  r   r  tender_automationr  document_extractorr   r   extract_annexures_autor\  rA  main_document_processorr&  dotenvr   getenvr   r   r   basicConfigINFO	getLoggerr,   r_   r  r+   r<   r   servicer:   r:   r:   r;   <module>   s4    



					h  E                                      
a
