o
    TJh                    @   s.  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlZd dlZd dlmZ d dlZd dlZd dlZd dlmZ d dlmZ d dlZd dlZd dlZd dlZ d dl!m"Z"m#Z# ej$ej%d	d
 e&e'Z(G dd dZ)G dd dZ*G dd dZ+e'dkre+ Z,dS dS )    N)datetime)Path)DictListOptionalAnyUnionTuple)urljoin)ClientError)setup_api_logging)!extract_documents_text_compatiblecreate_document_extractorz4%(asctime)s - %(name)s - %(levelname)s - %(message)s)levelformatc                   @   s8   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdS )Colorz)Utility class for colored terminal outputz[95mz[96mz[36mz[94mz[92mz[93mz[91mz[1mz[4mz[0mN)__name__
__module____qualname____doc__PURPLECYANDARKCYANBLUEGREENYELLOWREDBOLD	UNDERLINEEND r    r    =/var/www/html/minaions-tender/ai-engine/claude_rfp_service.pyr      s    r   c                   @   s  e Zd ZdZdd ZdedefddZdedefdd	Zd
ededefddZ	d
ededefddZ
dee dee fddZdedefddZdededefddZdedefddZdedefddZdededefdd Zd!edefd"d#Zd$ededefd%d&Zd$ededefd'd(Z	)dGd*ed+ed,eded-edefd.d/ZdHd+eded1edefd2d3Zdedee fd4d5Zded6edefd7d8Zded9edefd:d;ZdId+eded=edefd>d?Zd@ejddAfdBdCZ dDedefdEdFZ!dAS )J	ApiClientz,Handles all API calls to the Node.js servicec                 C   s"   | d| _|| _|dd| _dS )zInitialize API client/application/json)x-internal-api-keyzContent-TypeN)rstripbase_urlapi_keyheaders)selfr'   r(   r    r    r!   __init__/   s
   zApiClient.__init__	tenant_idreturnc                 C   6   | j  d| }tj|| jd}| | | d S )zGet tenant by ID	/tenants/r)   datar'   requestsgetr)   _check_responsejsonr*   r,   urlresponser    r    r!   
get_tenant8      
zApiClient.get_tenantc                 C   8   | j  d| d}tj|| jd}| | | d S )z,Get formatted company information for tenantr/   z/company-infor0   r1   r2   r7   r    r    r!   get_company_info?      
zApiClient.get_company_info	tender_idc                 C   s2  | j  d| d| }tj|| jd}| | | d }d|v rtdt|d   td|d   t	|d t
rg }|d D ]C}t	|trZd|v rZ|t|d  qEt	|tre|| qEtd	t| d
|  z	|t| W qE   td|  Y qE||d< td|d   |S )zGet tender by ID and tenant	/tenders/?tenant_id=r0   r1   originalDocumentsz'Original documents in tender response: zDocument IDs: _idzUnexpected document ID format:  - z)Could not convert document ID to string: zConverted document IDs: )r'   r3   r4   r)   r5   r6   loggerinfotype
isinstancelistdictappendstrwarningerror)r*   r?   r,   r8   r9   tender_datastring_docsdocr    r    r!   
get_tenderF   s,   

zApiClient.get_tenderupdate_datac                 C   8   | j  d| }tj||| jd}| | | d S )zUpdate tender with new datar@   r6   r)   r1   r'   r3   putr)   r5   r6   )r*   r?   rS   r8   r9   r    r    r!   update_tenderi      
zApiClient.update_tenderdocument_idsc              	   C   s  | j  d}g }|D ]4}|du rq
t|tr$d|v r$|t|d  q
t|tr7d|v r7|t|d  q
|t| q
td|  tj|d|i| j	d}|j
dkrtd	|j
  z| }td
|  W n   td|jdd   Y | | | d S )zGet documents by IDsz/documents/get-manyNrC   z$oidzRequesting documents with IDs: rZ   rU     zError getting documents: HTTP Error details: Raw response:    r1   )r'   rH   rJ   rK   rL   rE   debugr3   postr)   status_coderN   r6   textr5   )r*   rZ   r8   
string_idsdoc_idr9   
error_infor    r    r!   get_documentsp   s*   

zApiClient.get_documentsdocument_datac                 C   4   | j  d}tj||| jd}| | | d S )zCreate a new document recordz
/documentsrU   r1   r'   r3   r`   r)   r5   r6   )r*   rg   r8   r9   r    r    r!   create_document      
zApiClient.create_documentdocument_idc              	   C   s   t |dddd}td|  | j d| d| }tj|d| jid}|jd	krWt	d
|j  z|
 }t	d|  W n   t	d|jdd   Y | | |jS )zGet document content" 'z!Getting content for document ID: z/documents/z/content?tenant_id=r%   r0   r[   z%Error getting document content: HTTP r\   r]   Nr^   )rL   replacerE   rF   r'   r3   r4   r(   ra   rN   r6   rb   r5   content)r*   rl   r,   rd   r8   r9   re   r    r    r!   get_document_content   s   

zApiClient.get_document_contentanalysis_datac                 C   rh   )zCreate a new analysis recordz	/analysesrU   r1   ri   )r*   rs   r8   r9   r    r    r!   create_analysis   rk   zApiClient.create_analysisanalysis_idc                 C   r.   )zGet analysis by ID
/analyses/r0   r1   r2   )r*   ru   r8   r9   r    r    r!   get_analysis   r;   zApiClient.get_analysisc                 C   rT   )zUpdate analysis with new datarv   rU   r1   rV   )r*   ru   rS   r8   r9   r    r    r!   update_analysis   rY   zApiClient.update_analysisbid_datac                 C   rh   )z"Create a new bid generation recordz/bid-generationsrU   r1   ri   )r*   ry   r8   r9   r    r    r!   create_bid_generation   rk   zApiClient.create_bid_generationbid_generation_idc                 C   s<   | j  d| d| }tj|| jd}| | | d S )z#Get bid generation by ID and tenant/bid-generations/rA   r0   r1   r2   )r*   r{   r,   r8   r9   r    r    r!   get_bid_generation   s   
zApiClient.get_bid_generationc                 C   rT   )z#Update bid generation with new datar|   rU   r1   rV   )r*   r{   rS   r8   r9   r    r    r!   update_bid_generation   rY   zApiClient.update_bid_generationFfile_contents3_keycontent_typeencryptc                 C   sj   | j  d}dtj|||fi}|||t| d}d| ji}	tj||||	d}
| 	|
 |

 d S )zUpload file to S3z/storage/uploadfile)r,   keyr   r   r%   )filesr1   r)   r1   )r'   ospathbasenamerL   lowerr(   r3   r`   r5   r6   )r*   r   r   r   r,   r   r8   r   r1   r)   r9   r    r    r!   upload_file   s   


zApiClient.upload_fileTdecryptc                 C   sH   | j  d| d| dt|  }tj|d| jid}| | |jS )zDownload file from S3z/storage/download?tenant_id=&key=z	&decrypt=r%   r0   )r'   rL   r   r3   r4   r(   r5   rq   )r*   r   r,   r   r8   r9   r    r    r!   download_file   s   $
zApiClient.download_filec                 C   r<   )z#List company documents for a tenant/storage/company-documents/z/listr0   r1   r2   r7   r    r    r!   list_company_documents   r>   z ApiClient.list_company_documentsdownload_dirc                 C   s   | j  d| d}tj|d| jid}| | tj|d}t|d}|	|j
 W d   n1 s5w   Y  ddl}||d	}|| W d   n1 sTw   Y  t| d}	t|D ]\}
}
}|	t|7 }	qe|	S )
z+Download all company documents for a tenantr   z	/downloadr%   r0   zcompany_documents.zipwbNr   r)r'   r3   r4   r(   r5   r   r   joinopenwriterq   zipfileZipFile
extractallremovewalklen)r*   r,   r   r8   r9   zip_pathfr   zip_ref
file_count_r   r    r    r!   download_company_documents  s$   

z$ApiClient.download_company_documentslocal_docs_pathc              
   C   s   | j  d| d}g }t|D ],\}}}|D ]$}|ds#|dr$qtj||}	|d|t|	d| 	|ff qqd| j
i}
tj|||
d}|D ]
\}}|d	   qL| | | d
 S )z(Upload company documents from local pathr   z/upload..tmpr   rbr%   )r   r)      r1   )r'   r   r   
startswithendswithr   r   rK   r   _get_content_typer(   r3   r`   closer5   r6   )r*   r,   r   r8   r   rootr   	filenamesfilename	file_pathr)   r9   
file_tupler    r    r!   upload_company_documents"  s   "

z"ApiClient.upload_company_documents  
expires_inc                 C   sF   | j  d| d| d| }tj|| jd}| | | d d S )zGet signed URL for S3 objectz/storage/signed-url?tenant_id=r   z&expires_in=r0   r1   r8   r2   )r*   r   r,   r   r8   r9   r    r    r!   get_signed_url:  s   
zApiClient.get_signed_urlr9   Nc                 C   s|   |j dkr<d|j  }z(| }d|v r| d|d  }d|v r/| d|d  }W t|W t|   Y t|dS )z2Check response status and raise exception if errorr[   z
API Error message: rN   rD   N)ra   r6   	Exception)r*   r9   	error_msg
error_datar    r    r!   r5   A  s   
zApiClient._check_responser   c                 C   s2   t j|d  }ddddddd}||d	S )
(Get content type based on file extensionr   application/pdf	text/html
text/plainGapplication/vnd.openxmlformats-officedocument.wordprocessingml.documentAapplication/vnd.openxmlformats-officedocument.spreadsheetml.sheetr$   ).pdf.html.txt.docx.xlsxz.jsonapplication/octet-streamr   r   splitextr   r4   r*   r   extcontent_typesr    r    r!   r   P  s   zApiClient._get_content_type)F)T)r   )"r   r   r   r   r+   rL   r   r:   r=   rR   rX   r   rf   rj   bytesrr   rt   rw   rx   rz   r}   r~   boolr   r   r   intr   r   r   r3   Responser5   r   r    r    r    r!   r"   ,   s>    	#&
	r"   c                   @   sN  e Zd ZdZdGdefddZdedefdd	Zd
ededeeef fddZ	d
ededeeef fddZ
dededeeef fddZdededeeeef  deeef fddZdededeeeef  deeef fddZdededededeeef f
ddZdededeeef fddZdededeeef fdd Zdedefd!d"ZdHdeded$ed%edef
d&d'Zd(eeef d)edd#fd*d+Zd,edeeef fd-d.Zd/edefd0d1Zd2ededed3edee f
d4d5Zdefd6d7Zd)ed8efd9d:Zded;ed<efd=d>Zd?edefd@dAZd?edBedefdCdDZdEdF Zd#S )IClaudeRFPServicez/Main service for RFP processing using Claude AIconfig.jsonconfig_pathc              
   C   s  zt |d}t|| _W d   n1 sw   Y  W n ty2 } z	td|   d}~ww tj| jd d d| _	| j
di 
dd	| _tj| jd
d | j
di 
dd}| j
di 
dtj
d}|sstdt||| _t| jd d | _d| _d| _d| _dS )z)Initialize the service with configurationr   NzFailed to load configuration: 	anthropicr(   )r(   file_storagebase_dirz/tmp/rfp_serviceTexist_oknodejs_serviceapi_urlz"http://localhost:5000/internal-apiINTERNAL_API_KEYzgInternal API key is required. Please set it in config.json or as INTERNAL_API_KEY environment variable.)r   r6   loadconfigr   rE   rN   r   Clientclientr4   r   r   makedirsenviron
ValueErrorr"   
api_clientr   	extractorcurrent_tenant_idcurrent_process_idcurrent_process_type)r*   r   r   eapi_base_urlr(   r    r    r!   r+   a  s,   
zClaudeRFPService.__init__r,   r-   c           !   
   C   s  z| j |}|di }| jd i }|dg }|D ]"}|dd}|dd}|dd}	|r@|s8|	r@|r<|n|	||< qd}
|
d	7 }
|
d
7 }
|
d7 }
|dr^|
d|d  d7 }
|drm|
d|d  d7 }
|dr||
d|d  d7 }
|dg }t|dD ]\}}|r|
d| d| d7 }
q|dr|
d|d  d7 }
|dr|
d|d  d7 }
|dr|
d|d  d7 }
|dr|
d|d  d7 }
|dr|
d |d  d7 }
|d!r|
d"|d!  d7 }
|
d7 }
|
d#7 }
|
d$7 }
|d%r|
d&|d%  d7 }
|d'r|
d(|d'  d7 }
|d)r/|
d*|d)  d7 }
|
d7 }
|
d+7 }
|
d,7 }
|d-rK|
d.|d-  d7 }
|d/r[|
d0|d/  d7 }
|d1rk|
d2|d1  d7 }
|d3r{|
d4|d3  d7 }
|d5r|
d6|d5  d7 }
|d7r|
d8|d7  d7 }
|
d7 }
|d9g }|r|
d:7 }
|
d$7 }
t|dD ]Q\}}|dr|
d;| d<7 }
|
d=|d  d7 }
|d>r|
d?|d>  d7 }
|d@r|
dA|d@  d7 }
|dBr|
dC|dB  d7 }
|
d7 }
q|dDi }|r|
dE7 }
|
d7 }
|dFg }|D ]'}|dGrF|dHrF|dId}|
dJ|dG  d| dK|dH  d7 }
q |dLg }|D ]}|dMrb|
dN|dM  d7 }
qP|dOg }|D ]}|dPr~|
dQ|dP  d7 }
ql|dRg }|D ]}|dSr|
dT|dS  d7 }
q|
d7 }
|dUg }|rD|
dV7 }
|
dW7 }
t|dD ]\}}|dXs|dYrB|
dZ| d<7 }
|dXr|
d[|dX  d7 }
|d\r|
d]|d\  d7 }
|dYr|
d^|dY  d7 }
|d_r|
d`|d_  d7 }
|dar|
db|da  d7 }
|dcr.|
dd|dc  d7 }
|der>|
df|de  d7 }
|
d7 }
q|dgg }|ro|
dh7 }
|
di7 }
|D ]}|djri|
dk|dj  d7 }
qW|
d7 }
|
dl7 }
| jdmt|
 dn | jdo|
  |
W S  ty } zp| jdp|  z[| j |}|di }|dg }i }|D ]}|dd}|dd}|r|r|||< qd|ddq d} | d|ddq d7 } | dr|ddq d7 } | W W  Y ds}~S    Y W Y ds}~dtS ds}~ww )uz
        Get and format company information from API (structured data only)
        
        Args:
            tenant_id: Tenant ID
            
        Returns:
            str: Formatted company information from structured data
        company_infoz-Retrieved company info (structured data only)detailsnamern   valuer   z*COMPANY INFORMATION FOR TENDER SUBMISSION
z>============================================================

zBASIC COMPANY DETAILS:
z----------------------
companyNamezCompany Name: 

entityTypezEntity Type: registeredAddresszRegistered Address: branchOfficeAddressr   zBranch Office Address r   companyWebsitez	Website: officePhoneNumberzPhone: officeEmailzEmail: 	officeFaxzFax: authorizedPersonToSignDocszAuthorized Signatory: authorizedPersonDesignationzDesignation: zEMPLOYEE INFORMATION:
z---------------------
totalEmployeeszTotal Employees: technicalStaffzTechnical Staff: uploadedEmployeeDetailszEmployee Details Document: zLEGAL & REGISTRATION DETAILS:
z----------------------------

cin_numberz$Company Incorporation Number (CIN): 
pan_numberz Permanent Account Number (PAN): 
gst_numberzGoods and Services Tax (GST): msme_numberz9Micro, Small and Medium Enterprises (MSME) Registration: 
typeOfMsmezMSME Type: startUpRegistrationNumberzVDepartment for Promotion of Industry and Internal Trade (DPIIT) Startup Registration: directorDetailszDIRECTOR INFORMATION:
z	Director z:
z  Name: addressz  Address: panz"  Permanent Account Number (PAN): dinz(  Director Identification Number (DIN): financialDetailszFINANCIAL INFORMATION:
turnoverDetailsturnoverYearturnoverturnoverUnitzAnnual Turnover  
itrDetailsitrYearzIncome Tax Return (ITR) Year: ca_certificate_detailsca_certificate_descriptionz'Chartered Accountant (CA) Certificate: balanceSheetDetailsauditedBalanceSheetYearBalance Sheet Year: pastExperienceDetailszPAST EXPERIENCE:
z----------------
customerprojectzProject z  Client Name: clientLocationz  Client Location: z  Project Title: projectValuez  Project Value: projectScopez  Project Scope: projectStartDatez  Start Date: projectEndDatez  End Date: certificatezCERTIFICATIONS:
z--------------
descriptionz- z=============================================================
z,Successfully formatted company information (z characters)zCompany information: 
 z&Error formatting company information: zNot specifiedz	Contact: Nz!Company information not available)	r   r=   r4   process_loggerrF   	enumerater   r   rN   )!r*   r,   company_datar   details_infodetails_listitemr   r   file_urlformatted_infobranch_addressesir   director_detailsdirectorfinancial_detailsturnover_detailsr  unititr_detailsitrca_cert_detailsca_certbalance_sheet_detailsbalance_sheetpast_experience
experiencecertificatescertr   
basic_infofallback_infor    r    r!   _format_company_information  sH  









$z,ClaudeRFPService._format_company_informationr?   c           "      C   s  z|  ||d | jd| d|  | j||}|s%td| | jd|  | j|}|s=td| tj	| j
d| }tj|dd	 g }| jd
|d|  |dg }dd |D }| jd|  |std d|ddg ddW S | jdt| d | j|}| jdt| d g }	|D ]}
zt|
dd}|
dd| }| jd| d| d td|  | j||}| jd | d!t| d" tj	||}t|d#}|| W d$   n	1 sw   Y  | jd%|  | d&r=| jd'|  t|}|r=|	| | jd(t| d)|  zt| W n tyc } z| jd*| d!|  W Y d$}~nd$}~ww W q ty } z| jjd+|
d d!| dd, W Y d$}~qd$}~ww tt|	}| jd-t| d. |rt||}t | t!|}|D ]}zztj"|}t|d/}|# }W d$   n	1 sw   Y  |dt|}t$%d0d1|}d2| d3| d4| }| &|}| j'||||}||d5d6d7|||d8|d9d:}| j(|}
t|
d }|)||t|d; | jd<|  W q tyM } z| jd=| d!|  W Y d$}~qd$}~ww d>d |D }d?d |dg D }|| }d|id@t** dAt|t|dBdCidD} | j+||  zdd$l,}!|!-| W n   Y d|t|t||dEt| dFdW S  ty } ztdGt|  dHdGt| dIW  Y d$}~S d$}~ww )JzKExtract links from existing RFP documents and download additional documentsget_rfp_documentszGetting tender z for tenant Tender not found: zGetting tenant zTenant not found: 	rfp_docs_Tr   z,Getting additional RFP documents for tender 	bidNumberrB   c                 S   s   g | ]}|rt |qS r    rL   .0rd   r    r    r!   
<listcomp>  s    z6ClaudeRFPService.get_rfp_documents.<locals>.<listcomp>zDocument IDs from tender: zNo document IDs found in tendersuccessr   z'No documents found in tender to process)statusr?   links_founddocuments_downloadeddownloaded_documentsr   zGetting 
 documentsz
Retrieved rC   rn   r   doc_zProcessing document: z (ID: )zGetting content for document zReceived content for document r   z bytesr   NSaved document to r   zExtracting links from PDF: z
Extracted z links from zCould not delete temp file zError processing document )exc_infozFound z" unique links across all documentsr   [\/\\]r   ztenders/r#   z/linked/rfptenders3extracted_link)tenderIdr9  sourceparentTenderIdtenantr   rG   categorystorageTypestorageDetailsmetadata)rl   r   sizezSaved linked document: zError saving linked document c                 S   s   g | ]}|d  qS )rl   r    r<  rQ   r    r    r!   r=        c                 S      g | ]}t |qS r    r:  r;  r    r    r!   r=    rX  logslinked_documents_added)r@  rA  	timestampactionr   z$set$pushzSuccessfully downloaded z additional RFP documentszError getting RFP documents: rN   )r?  r   )._setup_process_contextr  rF   r   rR   r   r:   r   r   r   r   r   r4   rE   rM   r   rf   rL   rr   r   r   r   r   taextract_links_from_pdfextendr   r   rN   rI   setdownload_linked_documentsremove_duplicate_pdfslist_files_in_directoryr   readresubr   r   rj   rK   timerX   shutilrmtree)"r*   r?   r,   rJ  rQ  processing_dirrB  rZ   	documents	all_linksrQ   rd   doc_namedoc_content	temp_pathr   linkscleanup_errr   unique_linksdownloaded_filesr   r   rq   
bid_numberr   r   storage_detailsdocument_recordnew_doc_idsexisting_doc_idsall_doc_idsrS   rm  r    r    r!   r6  p  s  
	

$.



&	z"ClaudeRFPService.get_rfp_documentsc                 C   sT  |  ||d | j||}|std| dd |dg D }| j|}i }d}d}|D ]C}	| jt|	d |jdd	d
}
|
|t|	d < |
rs|
	 }t
|}t
|
}||7 }||7 }| jd|	d d| d| d q0d}td|| }d}g }|dkrd}|d |dkrd}|d t
|dkr|d9 }|d |dkr|d9 }|d t|| d}||dd|d ||t
|||||d!t d"||t
|d#d$gd%}| j|}t|d }| jd&| d'|  | jd(|  | jd) | jtj tj d*| tj  | jd) ||d t
|||||d+d,S )-zHEstimate the cost for RFP analysis based on document complexity and sizeanalysis_estimationr7  c                 S   rY  r    r:  r;  r    r    r!   r=  =  rX  z;ClaudeRFPService.estimate_analysis_cost.<locals>.<listcomp>rB   r   rC   utf-8ignore)errorsz	Document r   r   z words,  charactersg rhq?
         ?i'  333333?zLarge document sizeia  g      ?zVery large document size   g?zMultiple documentsi zHigh content density   	estimatedINR)	wordCountcharacterCountdocumentCountcomplexityFactorcomplexityIndicatorsbaseCostcostPerWordanalysis_cost_estimation)estimated_cost
word_countdocument_countr\  )rQ  rJ  rG   r?  estimatedCostcurrencyanalysisMetricsrZ  z#Estimated analysis cost for tender u   : ₹z"Total words across all documents: 2==================================================u   Estimated Analysis Cost: ₹)r  r  character_countcomplexity_factorcomplexity_indicators)ru   r  r  r   )ra  r   rR   r   r4   rf   rr   rL   decodesplitr   r  rF   maxrK   roundrl  rt   r   r   r   r   )r*   r?   r,   rJ  rZ   rp  documents_texttotal_word_counttotal_char_countrQ   rs  wordsdoc_word_countdoc_char_countbase_cost_per_word	base_costr  r  r  analysis_recordanalysisru   r    r    r!   estimate_analysis_cost4  s    &




$z'ClaudeRFPService.estimate_analysis_costru   c                 C   s,  z|  ||d | j|}|std| t|d |kr5| jd|d  d|  td| t|d }| jd|  | j||}|sVtd|d  | j	|d	d
idt

 ddid dd |dg D }| j|}tj| jd| }tj|dd g }	|D ]=}
z|
dd|
d  }|
di }|	|d W q ty } ztd|
d d|  W Y d}~qd}~ww t||	}tj|d}tj|dd z#| jd t||| jd d  }| jd!|rt|nd" d# W n ty# } z| jd$|  d}W Y d}~nd}~ww | || |s1td%t|\}}| |}| jd&t| d' t||\}}t |}||||d(t|t|t!d)d* |" D d+d,}| j	|d-|d.dt

 d/|t|d0d1id zd"dl#}|$| W n ty } z| jd2|  W Y d}~nd}~ww |r| jd3|d4 d5|  n| jd6|d4 d5|  d-||||d7 |d8W S  ty } z(td9|  z| j	|d	d:idt

 d;d<t|id1id W     Y  d}~ww )=zFAnalyze RFP documents to extract key information and check eligibilityr  Analysis record not found: rQ  Tenant mismatch:  vs rJ  )   ✅ Analysis found and tenant validated: r7  r?  analysis_in_progressrZ  rfp_analysis_startedr]  r^  r_  c                 S   rY  r    r:  r;  r    r    r!   r=    rX  z0ClaudeRFPService.analyze_rfp.<locals>.<listcomp>rB   	analysis_Tr   r   rD  rC   rT  r8   zFailed to get document URL r   Nanalysis_outputz9Attempting document extraction with external extractor...r   r(   zDocument extractor returned r   rC  zDocument extractor failed: z8No text was extracted from documents using either methodzCompany Info length: r  )
isEligiblereasonc                 s   s    | ]	}t | V  qd S N)r   r  )r<  rb   r    r    r!   	<genexpr>  s    z/ClaudeRFPService.analyze_rfp.<locals>.<genexpr>)documentsProcessedanalysisChunkstotalWordCount)extractedInforequired_documentseligibilityanalysisMetadataanalysis_completed)r?  resultsrfp_analysis_completed)is_eligibledocuments_processedr\  'Failed to cleanup temporary directory: u$   🚀 Company is eligible for tender r9  z
. Reason: u*   ⚠️ Company is NOT eligible for tender r  )r?  r  eligibility_reasonextracted_infoanalysis_metadatar  zFailed to analyze RFP: analysis_failedrfp_analysis_failedrN   )%ra  r   rw   r   rL   r  rN   rF   rR   rx   rl  r4   rf   r   r   r   r   r   rK   r   rE   rb  rf  r   r   r   rM   _debug_extraction_resultsanalyze_tender_with_LLMr5  check_eligibilitybpaidentify_required_documentssumvaluesrm  rn  )r*   ru   r,   r  r?   rJ  rZ   rp  temp_dirrw  rQ   r   rz  r   rx  
output_dirr  r  doc_analysesr   r  r  required_docsanalysis_resultrm  r    r    r!   analyze_rfp  s   ($

 	

zClaudeRFPService.analyze_rfpr  c                 C   s  |  ||d | j|}|std| t|d |kr3| jd|d  d|  td| | jd|  |ddkrGtd	|d
i }|di }|di }d}dd |D }	dd |	D }
dd |	D }t	|
}t	|}t	|	}d}g }|D ]}dt
dd }||7 }||dddt|dd q|
D ]}dt
dd }||7 }||dddt|dd qt|| d}|t|d |dd |d!||||t|d||	d"t d#||t|d|d$d%gd&	}| j|}t|d' }| jd(|  | jd) | jtj tj d*| tj  | jd+|  | jd,t|d  | jd-| d.| d/| d0 | jd) ||d!|t|d|||||	d1d2S )3z-Estimate the cost for bid document generationestimate_bid_generationr  rQ  r  r  r  r?  r  z@Analysis must be completed before estimating bid generation costr  r  r  i  c                 S   s$   g | ]}| d d dv r|qS )rG   rn   )CUSTOMANNEXUREr4   upperrW  r    r    r!   r=  j     $ zAClaudeRFPService.estimate_bid_generation_cost.<locals>.<listcomp>c                 S   $   g | ]}| d d dkr|qS )rG   rn   r  r  rW  r    r    r!   r=  m  r  c                 S   r  )rG   rn   r  r  rW  r    r    r!   r=  n  r  r   }   r  r  r   Annexurer  r  )documentrG   costd   zCustom Documentr  rJ  bid_generation_estimationr  r  )totalBillableDocumentscustomDocumentCountannexureCountr  additionalCostcostBreakdownbillableDocumentsbid_generation_cost_estimation)r  r  additional_costbillable_document_countr\  )	rQ  rJ  
analysisIdrG   r?  r  r  generationMetricsrZ  rC   u"   Estimated bid generation cost: ₹r  u"   Estimated Bid Generation Cost: ₹u   Base Cost: ₹u   Additional Cost: ₹zBillable Documents: z
 (Custom: z, Annexure: rE  )r  r  total_billable_documentscustom_document_countannexure_countcost_breakdownbillable_documents)r{   r  r  r   )ra  r   rw   r   rL   r  rN   rF   r4   r   randomuniformrK   r  rl  rz   r   r   r   r   )r*   ru   r,   r  r  r  r  r  base_generation_costbillable_docscustom_docsannexure_docscustom_countr  total_billable_countr  r  annexureannexure_cost
custom_doccustom_costr  bid_generation_recordbid_generation
bid_gen_idr    r    r!   estimate_bid_generation_costF  s   






$ z-ClaudeRFPService.estimate_bid_generation_costr{   c           2      C   s,	  |  ||d | j||}|std| t|d }| j|}|s,td| t|d }| j||}|sDtd|d  | j|ddid	t d
did zt	j
| jd| }	t	j
| jd| }
t	j|	dd t	j|
dd dd |dg D }| j|}|D ]{}zW| jt|d |}|dd|d  }|ds|di ddkr| d}t	j
|	|}t|d}|| W d   n1 sw   Y  | jd|  W q ty } z| jd|d d|  W Y d}~qd}~ww z;| j|}|d i }t	j
|
d!}t	j
|
d"}t	j|dd t	j|dd g }|d#g }|D ]C}|d$}|r|dd%|d&d%|d'd%|d(d%|d)}| |}|d*krn|n|}| ||}|r||||d+ q@|d,g }t|D ]<\} }!|!d$}|rd-| d.  d/d0d1|!dd% d2|!dd% ||!d3}| ||}|r|||d4d+ q|d5g }"t|"D ]B\} }#|#d$}|rd6| d.  d7d8|#d9d% d:|#d9d% d;|#d<d% ||#d=}| ||}|r|||d*d+ q|d>i }$|$d?g }%t|%D ]?\} }&|&d$}|rfd@|&dA| d.  dBdC|&dAd% dD|&dAd% ||&dE}| ||}|rf|||d4d+ q(|$dFg }'t|'D ]8\} }(|(d$}|rdG| d.  dB|(dHd%dI|(dHd% ||(dJ}| ||}|r|||d4d+ qr|$dKg })t|)D ]?\} }*|*d$}|rdL|*dM| d.  dBdN|*dMd% dO|*dMd% ||*dP}| ||}|r|||d4d+ q|dQg }+t|+D ]8\} },|,d$}|r6dR| d.  dQ|,d(d%dS|,d(d% ||,dT}| ||}|r6|||d4d+ q| jdUt| dV || _W n tyg } z| jdW|  g | _W Y d}~nd}~ww t	j
|	dX}-t	j|-dd t	j
|-dY}.t|.dZd[d\}t j!|d]i d^i |d_d` W d   n	1 sw   Y  | "|}| jda|db  t#$|	|
||}/| %|/|||dbd%}0zdcdl&}1|1'|	 |1'|
 W n ty } z| jdd|  W Y d}~nd}~ww | j|de|0|/t(| dfg dgd	t dht|0|/didjid | jdkt|0 dl dm|0t(| dfg |/t|0dnt|0 dodpW S  ty } zPt)dq|  z*dcdl&}1drt* v r`t	j
+|	r`|1'|	 dst* v rrt	j
+|
rr|1'|
 W n   Y | j|ddtid	t dudvt|idjid  d}~ww )wz9Generate complete bid documents using enhanced automationr  !Bid generation record not found: r  r  rJ  r7  r?  generation_in_progressrZ  bid_document_generation_startedr  r_  bid_gen_company_docs_Tr   c                 S   rY  r    r:  r;  r    r    r!   r=    rX  z;ClaudeRFPService.generate_bid_documents.<locals>.<listcomp>rB   rC   r   rD  r   rT  mimeTyper   r   NrF  Failed to save document r   r   Standard_DocumentsExperience_Documentsr   r   rn   rG   r   r  )r   rG   r   r  r   r0  )
local_pathr   rR  r   	director_r   	_documentr$  z
Director: zDocument for Director )r   rG   r   r  r   director_datastandardr  experience_project_r/  z	Project: r  z Experience document for project z with client r  )r   rG   r   r  r   experience_datar  r	  itr_r
  	financialz
ITR Year: zIncome Tax Return for year )r   rG   r   r  r   itr_datar  ca_certificate_r  zCA Certificate: )r   rG   r   r  r   ca_cert_datar  balance_sheet_r  r  zAudited Balance Sheet for year )r   rG   r   r  r   balance_sheet_datar  certificate_zCertificate: )r   rG   r   r  r   certificate_datazDownloaded and organized z company documentsz&Failed to download company documents: tender_analysistender_analysis.jsonwr  encodingr  r     indentz$Generating bid documents for tender r9  r   z)Failed to cleanup temporary directories: generation_completedcompany_document_urls)r?  generatedDocumentsoutputDirectorycompanyDocumentUrls!bid_document_generation_completed)r  output_directoryr\  zSuccessfully generated z bid documents	completedz
Generated z bid documents successfully)r?  generated_documentsr'  r,  r  r   z"Failed to generate bid documents: temp_rfp_dirtemp_company_docs_dirgeneration_failedbid_document_generation_failedrN   ),ra  r   r}   r   rL   rw   rR   r~   rl  r   r   r   r   r   r4   rf   rr   r   r   r   r  rF   r   rN   r=   _categorize_company_document#_download_and_save_company_documentrK   r  r   r'  rM   r6   dumpr5  r  prepare_bid_documents_save_generated_documents_to_s3rm  rn  getattrrE   localsexists)2r*   r{   r,   r  bid_genru   r  r?   rJ  r/  r0  rZ   rp  rQ   rs  r   r   r   r   r  r   std_docs_direxp_docs_dirdownloaded_company_docsr  r  r   doc_inforR  
target_dir
saved_pathr$  r#  r%  r/  r0  r&  r)  r*  r+  r,  r-  r.  r1  r2  analysis_diranalysis_filefinal_docs_dirgenerated_document_idsrm  r    r    r!   generate_bid_documents  s  

 
*





	
	
	
	

	
	

	
"



	


z'ClaudeRFPService.generate_bid_documentsdocument_pathuser_promptc                  C   s~  |  ||d z| j||}|std| t|d }| j|}|s.td| t|d }| j||}	|	sFtd|d  | jd|  t	j
| jd| }
t	j|
d	d
 dd |	dg D }| j|}|D ]q}zN| jt|d |}|dd|d  }|ds|di ddkr| d}t	j
|
|}t|d}|| W d   n1 sw   Y  W qt ty } z| jd|d d|  W Y d}~qtd}~ww t	j
|
d}t	j|d	d
 t	j
|d}t|ddd}tj|di di |d d! W d   n	1 sw   Y  | |}t	j
|
d"}t	j|d	d
 tj|||||di di d#}|sOtd$t|d%}| }W d   n	1 sdw   Y  t	j
|}|	d&t|}td'd(|}d)| d*| d+| }| |}| j ||||}||d,d-d.|||||t!! d/d0}| j"|}t|d }| j#|d1|t!! d2|||d3d4d5i zd6dl$}|%|
 W n ty } z| j&d7|  W Y d}~nd}~ww | jd8|  d9|||d:| d;W S  ty> } z*t'd<|  zd6dl$}d=t( v r1t	j
)|
r3|%|
 W  W  W     Y  d}~ww )>z9Regenerate a specific bid document based on user feedbackregenerate_bid_documentr  r  r  rJ  r7  zRegenerating document: regenerate_Tr   c                 S   rY  r    r:  r;  r    r    r!   r=  ?  rX  z<ClaudeRFPService.regenerate_bid_document.<locals>.<listcomp>rB   rC   r   rD  r   rT  r
  r   r   Nr  r   r  r  r   r  r!  r  r  r#  r$  regenerated_docs)org_doc_file_pathrH  r   rD  r  zFailed to regenerate documentr   r9  rH  r   bid_documents/r#   z/regenerated/bid_document_regeneratedregeneratedrK  )bidGenerationIdr9  originalDocumentPath
userPromptregeneratedAtrP  r`  document_regenerated)rl   original_pathrH  r\  )regeneratedDocumentsrZ  r   r  z#Successfully regenerated document: r-  z#Document regenerated successfully: )r?  regenerated_document_idr   r   r   z#Failed to regenerate bid document: r/  )*ra  r   r}   r   rL   rw   rR   r  rF   r   r   r   r   r   r4   rf   rr   r   r   r   r   rN   r6   r5  r5  r  regenerate_documentri  r   rj  rk  r   r   rl  rj   r~   rm  rn  rM   rE   r9  r:  ) r*   r{   r,   rG  rH  r;  ru   r  r?   rJ  r/  rZ   rp  rQ   rs  r   r   r   r   rB  rC  r   rD  regenerated_doc_pathrq   ry  r   r   rz  r{  regenerated_doc_idrm  r    r    r!   rI  !  s    
*"


z(ClaudeRFPService.regenerate_bid_documentr   c                 C   sb   t j|std| | j||}|d sdnd|d |d |d |d d|d  d	d
S )z?Upload company documents to S3 for future use in bid generationz%Local documents path does not exist: upload_errorsr-  partialuploaded_filesuploaded_counterror_countz	Uploaded z company documents to S3)r?  r]  r[  r^  r_  r   )r   r   r:  r   r   r   )r*   r,   r   resultr    r    r!   upload_company_documents_to_s3  s   z/ClaudeRFPService.upload_company_documents_to_s3c              
   C   s  zo|  ||d | j|}|std| t|d |kr'td| t|d }| j||}|s?td|d  ztj| j	d| }tj
|dd d	d
 |dg D }| j|}|D ]C}	| jt|	d |}
|	dd|	d  }|ds| d}tj||}t|d}||
 W d   n1 sw   Y  qf| jd|  tj|dd}td|  tj|st|}nd}|rVd| }d}tj|s| | tj|s| || | |rRt|||}d}t|d}| }W d   n	1 sw   Y  d| d}d}| j||||}| j|d|d|d |did t d!d"|id#id$ d%||d&d'W W S t d(t d) t yp } z
| j!d*|   d}~ww  t y } z	t!d*|   d}~ww )+z>Setup chat functionality for RFP queries using enhanced searchsetup_rfp_chatr  rQ  rJ  r7  chat_setup_Tr   c                 S   rY  r    r:  r;  r    r    r!   r=    rX  z3ClaudeRFPService.setup_rfp_chat.<locals>.<listcomp>rB   rC   r   rD  r   r   Nz+Setting up chat functionality for analysis r  chunks.jsonChunk Data file path r  index_es.jsonrn   r   tender_analysis//chunks.jsonr$   	chatSetupr8   )bidIdsetupCompleted
chunksPathindexConfigrZ  chat_setup_completedbid_idr\  r_  r-  z%Chat functionality setup successfully)r?  ro  chunks_pathr   Failed to validate chunks data*Failed to process documents for chat setupzFailed to setup RFP chat: )"ra  r   rw   r   rL   rR   r   r   r   r   r   r4   rf   rr   r   r   r   r  rF   rE   r:  bqprocess_task_create_default_index_config_convert_chunks_to_json_validate_chunks_datachates_index_datari  r   rx   rl  r   rN   )r*   ru   r,   r  r?   rJ  r  rZ   rp  rQ   rs  r   r   r   	data_filer?  ro  index_config_filetag_responsesrq   r   r   rz  r   r    r    r!   rb    s   




	
zClaudeRFPService.setup_rfp_chatc                 C   s  zt d|  | ||d | j|}|s td| t|d }| j||}|s8td|d  tj	
| jd| }tj|dd d	d
 |dg D }| j|}| jdt| d |D ]C}	| jt|	d |}
|	dd|	d  }|ds| d}tj	
||}t|d}||
 W d   n1 sw   Y  qi| jd tj	
|dd}t d|  tj	|st|}nd}|rd| }d}tj	|s| | tj	|s| || | jd | |rzdt|||}d}t|d}| }W d   n	1 sw   Y  d| d}d}| j||||}| j|d |dd!|d" |t   d#id$t   d%|t|dd&d'id( | jd)|  W nH t!y } z3| j"d*|  | j|d |d!d!dt|t   d+id$t   d,t|d-d.d'id( W Y d}~nd}~ww t!d/t!d0zd1dl#}|$| W W dS  t!y } z| j%d2|  W Y d}~W dS d}~ww  t!yA } z^t "d3| d4|  z)| j|d d| d!d!dt|t   d+id$t   d,t|d5d.d'id( W n t!y. } zt "d6|  W Y d}~nd}~ww W Y d}~dS W Y d}~dS d}~ww )7z1Background task for setting up chat functionalityz,Starting background chat setup for analysis setup_rfp_chat_bgr  rJ  r7  rc  Tr   c                 S   rY  r    r:  r;  r    r    r!   r=  d  rX  z>ClaudeRFPService.setup_rfp_chat_background.<locals>.<listcomp>rB   zDownloading z documents...rC   r   rD  r   r   Nz&Processing documents for chat setup...r  rd  re  r  rf  z"Starting Elasticsearch indexing...rn   r   rg  rh  r$   ri  Fr8   )rj  rk  setupInProgressrl  rm  setupCompletedAtrZ  rn  )ro  indexed_chunksr>  r\  r_  z/Chat setup completed successfully for analysis zElasticsearch indexing failed: )rj  rk  r~  setupFailederrorMessagefailedAtchat_setup_failedelasticsearch_indexing)rN   stagerq  rr  r   z"Failed to cleanup temp directory: z*Background chat setup failed for analysis r   background_processingz-Failed to update analysis with error status: )&rE   rF   ra  r   rw   r   rL   rR   r   r   r   r   r   r4   rf   r  r   rr   r   r   r   r:  rs  rt  ru  rv  rw  rx  ry  ri  r   rx   rl  r   rN   rm  rn  rM   )r*   ru   r,   r  r?   rJ  r  rZ   rp  rQ   rs  r   r   r   rz  r?  ro  r{  r|  rq   r   r   rz  es_errorrm  cleanup_errorr   update_errorr    r    r!   setup_rfp_chat_backgroundL  s  




"z*ClaudeRFPService.setup_rfp_chat_backgroundNquery	client_idc              
   C   s  | j |}|std| t|d |krtd| |di }|ds-td|d}|s9d| }zqtj| jd| }d	| }tj|d
d}	d| d}
tj	|	stj|d
}tj
|dd | j |
|d}t|	d}|| W d   n1 sw   Y  t||||	}| j |ddt |||dii |W S  ty } z	td|   d}~ww )z8Chat with RFP documents using enhanced search and Clauder  rQ  ri  rk  z6Chat functionality not setup. Please setup chat first.rj  tenant_rc  r  r  rd  rg  rh  Tr   Fr   Nr`  chatHistory)r]  clientIdr  r9   zFailed to process chat query: )r   rw   r   rL   r4   r   r   r   r   r:  r   r   r   r   rx  chat_with_rfprx   rl  r   rE   rN   )r*   ru   r,   r  r  r  
chat_setupro  r  rz  r   r  rs  r   r9   r   r    r    r!   r    sP   



zClaudeRFPService.chat_with_rfpr  r  c              
   C   s   t d t d|  t dtj|rt|nd  |rZt dt| d | D ]'\}}|rA|dd d	d
nd}t dtj	| dt| d| d q1nt 
d t d dS )z&Debug helper to log extraction resultsz&=== Document Extraction Debug Info ===zTemp directory: zFiles in temp directory: zDirectory not foundzExtracted text from z documents:Nr^   r   r  z
No contentz  - r   z	 chars - z...z*No documents_text returned from extractionz=== End Debug Info ===)rE   rF   r   r   r:  listdirr   itemsrp   r   rM   )r*   r  r  doc_pathrq   content_previewr    r    r!   r  9  s   
&,
z*ClaudeRFPService._debug_extraction_resultsr  c           	   
   C   s4  t j|d}t j|rzot|ddd}t|}W d   n1 s%w   Y  tdt	| d t
|tru|rui }| D ]\}}t
|trT| rT|||< qBtd|  qB|rmtd	t	| d
 |W S td W i S td W i S  ty } ztd|  W Y d}~i S d}~ww i S )z5Check and load results from document extractor outputzdoc_text.jsonr   r  r!  Nz"Found existing doc_text.json with z entrieszSkipping invalid entry for zLoaded z) valid documents from existing extractionz0No valid content found in existing doc_text.jsonz.doc_text.json exists but contains invalid dataz&Error reading existing doc_text.json: )r   r   r   r:  r   r6   r   rE   rF   r   rH   rJ   r  rL   striprM   r   rN   )	r*   r  doc_text_filer   r1   valid_entriesr   r   r   r    r    r!    _check_document_extractor_outputI  s4   
z1ClaudeRFPService._check_document_extractor_outputr   c                 C   s0   t j|d  }dddddd}||dS )	r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r    r    r!   r   k  s   z"ClaudeRFPService._get_content_typerD  ry  c                 C   sB  g }t |D ]\}}}|D ]}	t j||	}
|	ds!|	dr"qz^t|
d}| }W d   n1 s7w   Y  t j|
|}d| d| d| }| 	|	}| j
||||}||	dt j|d||||d	d
}| j
|}t|d }|| W q ty } ztd|	 d|  W Y d}~qd}~ww q|S )z>Save generated bid documents to S3 and create database recordsr   r   r   NrM  r#   bid_documentrK  )rP  r9  relativePathrP  rC   z"Failed to save generated document r   )r   r   r   r   r   r   r   ri  relpathr   r   r   r   rj   rL   rK   r   rE   rN   )r*   rD  r{   r,   ry  rZ   r   dirsr   r   r   r   rq   rel_pathr   r   rz  r  rQ   rd   r   r    r    r!   r7  w  sH   


"+z0ClaudeRFPService._save_generated_documents_to_s3c                 C   s   ddddddiddidd	d
ddiddd}t jt j|dd t|d}tj||dd W d   dS 1 s=w   Y  dS )z0Create default Elasticsearch index configurationr  r   )number_of_shardsnumber_of_replicastrueenabledrG   rb   dense_vectori   )rG   dimskeyword)rb   title_vectortag)dynamic_source
properties)settingsmappingsTr   r   r$  N)r   r   r   dirnamer   r6   r5  )r*   r   r   r   r    r    r!   ru    s(   "z-ClaudeRFPService._create_default_index_configoutput_filec           
   	   C   s   ddl }tj|dd}tj|rZ||}g }| D ]\}}||dd|dd|ddd	 qt	|d
dd}	t
j||	ddd W d   dS 1 sSw   Y  dS dS )z>Convert Excel chunks to JSON format for Elasticsearch indexingr   Nr  zchunks.xlsxTagrn   questionanswertagNamer  r  r   r  r!  r  Fr%  ensure_ascii)pandasr   r   r   r:  
read_exceliterrowsrK   r4   r   r6   r5  )
r*   r  r  pd
excel_filedf	json_datar   rowr   r    r    r!   rv    s   




"z(ClaudeRFPService._convert_chunks_to_json
process_idprocess_typec                 C   sJ   || _ || _|| _td| d| |||| _| jd| d|  dS )z
        Set up process context for logging
        
        Args:
            tenant_id: Tenant ID
            process_id: Process ID (tender_id, analysis_id, etc.)
            process_type: Process type (discovery, analysis, bid_generation)
        zrfp_service.r   z	Starting z process for N)r   r   r   r   r  rF   )r*   r,   r  r  r    r    r!   ra    s   	z'ClaudeRFPService._setup_process_contextr?  c                    st   | dd }| dd  | dd g d}||v s6t fdd|D s6tfdd|D r8d	S d
S )a  
        Categorize company document as standard or experience based on type and content
        
        Args:
            doc_info: Document information dictionary
            
        Returns:
            str: Category ("standard" or "experience")
        rG   rn   r   r   )r/  r0  r  
work_order
completion	portfolior   r  c                 3       | ]}| v V  qd S r  r    r<  	indicator)rr  r    r!   r  		      z@ClaudeRFPService._categorize_company_document.<locals>.<genexpr>c                 3   r  r  r    r  )	doc_valuer    r!   r  
	  r  r0  r  )r4   r   any)r*   r?  doc_typeexperience_indicatorsr    )rr  r  r!   r3    s   
z-ClaudeRFPService._categorize_company_documentr@  c              
   C   s(  z| d}|sW dS | dd}tdd| dd}d	dl}|j|}tj	
|j	d
 p2d}| | }	tj	||	}
|
d }tj |dd}|  t|
d}||j W d   n1 sew   Y  d}| dr||d  r||d }nLg }| dr|d|d   | dr|d|d   | dr|d|d   dD ]}||v r|dtj|| dd  qd|}t|ddd}|| W d   n1 sw   Y  | jd|	  |
W S  ty } z| jd| dd  d!|  W Y d}~dS d}~ww )"a$  
        Download company document and create .desc file
        
        Args:
            doc_info: Document information dictionary
            target_dir: Target directory to save the document
            
        Returns:
            str: Path to saved document or None if failed
        r   Nr   r  z[^\w\s-]rn   r  r   r   r   r   z.desc   )timeoutr   r  zName: rG   zType: r   zValue: )r  r  r  r  r  r  zData: r  r$  r   r   r  r!  zDownloaded company document: z$Failed to download company document unknownr   )r4   rj  rk  r  rp   urllib.parseparseurlparser   r   r   r   r3   raise_for_statusr   r   rq   rK   r6   dumpsr  rF   r   rN   )r*   r?  r@  r   rr  safe_filenameurllib
parsed_urlfile_extensionr   r   	desc_pathr9   r   description_content
desc_partsr   r   r    r    r!   r4  	  sT   





 z4ClaudeRFPService._download_and_save_company_documentc           
   
   C   s  t d|  	 zt|ddd}t|}W d    n1 s!w   Y  g }d}t|D ]r\}}t|tsDtd| d |d7 }q.d	|vsL|d	 sYtd| d
 |d7 }q.d|vsa|d sntd| d |d7 }q.d|vsv|d std| d |d7 }q.t	|d	 
 t	|d 
 t	|d 
 d}|| q.tdt| d| d |rt|ddd}tj||ddd W d    n1 sw   Y  tdt| d t|dkW S  ty }	 ztd|	  W Y d }	~	dS d }	~	ww )NzValidated Data file path r   r  r!  r   zChunk z: Not a dictionaryr   r  z: Missing or empty tagNamer  z: Missing or empty questionr  z: Missing or empty answerr  zValidation complete: z valid chunks, z invalid chunksr   r  Fr  zWrote z cleaned chunks back to filezError validating chunks data: )rE   rF   r   r6   r   r  rH   rJ   printrL   r  rK   r   r5  r   )
r*   rz  r   r1   valid_chunksinvalid_countr#  chunkcleaned_chunkr   r    r    r!   rw  W	  sT   
z&ClaudeRFPService._validate_chunks_data)r   r  )r   r   r   r   rL   r+   r5  r   r   r6  r  r  r   r  rF  rI  ra  rb  r  r  r  r  r   r7  ru  rv  ra  r3  r4  rw  r    r    r    r!   r   ^  sX    & j Ek .( .  
Y

 x  3;"

3Gr   __main__)-r   r6   rl  loggingr   pathlibr   typingr   r   r   r   r   r	   rj  r3   r  r
   tempfiler  r   botocore.exceptionsr   log_forwarderr   bid_prep_automationr  bid_queriesrs  r  rx  tender_automationrb  document_extractorr   r   basicConfigINFO	getLoggerr   rE   r   r"   r   servicer    r    r    r!   <module>   s\     
  4                A
