o
    gif                     @   s.  d Z ddlZddlZddlZddlZddlZddlmZmZm	Z	m
Z
 ddlmZmZmZ ddlmZmZ ddlZddlmZ ddlmZ ddlmZ ejejd	 eeZed
dgdZd@dedefddZe Z e !di !de"ddZ#e !di !de"dZ$G dd dZ%G dd deZ&G dd deZ'dAde(defd d!Z)d"ede	e fd#d$Z*d%e%d&ed'ede	e fd(d)Z+d%e%d*ed'ede	e fd+d,Z,	dAd"ed%e%d-ed.e(deee
f f
d/d0Z-	1			2dBd3e(d.e(de(d4e.deee
f f
d5d6Z/ej0d7e'd8d9e&d:efd;d<Z1e!d=d>d? Z2dS )Cz
Location Finder API for Minaions Tender System
Extracts delivery locations from GeM tender PDFs and updates backend
To be imported and included in main app.py
    N)DictListOptionalAny)	APIRouterHTTPExceptionBackgroundTasks)	BaseModelField)datetime)Path)TenderLocationExtractor)levelz/api/location-finderzLocation Finder)prefixtagsconfig.jsonconfig_pathreturnc              
   C   sx   zt | d}t|W  d   W S 1 sw   Y  W dS  ty; } ztd| d i W  Y d}~S d}~ww )z#Load configuration from config.jsonrNzFailed to load configuration: z. Using environment variables.)openjsonload	Exceptionloggerwarning)r   fe r   >/var/www/html/minaions-tender/ai-engine/location_finder_api.pyload_config   s   (r   nodejs_serviceapi_urlBACKEND_API_URLz"http://localhost:5000/internal-apiapi_keyINTERNAL_API_KEYc                   @   s   e Zd ZdZdd Zddededee fd	d
Zddededee fddZ	de
dedefddZde
de
defddZde
de
defddZdejddfddZdS )BackendApiClientz(Handles API calls to the Node.js backendc                 C   s"   | d| _|| _|dd| _d S )N/zapplication/json)x-internal-api-keyzContent-Type)rstripbase_urlr#   headers)selfr)   r#   r   r   r   __init__5   s
   zBackendApiClient.__init__  r   limitskipr   c                 C   >   | j  d}||d}tj|| j|d}| | | d S )zGet all tendersz/tendersr.   r/   r*   paramsdatar)   requestsgetr*   _check_responser   r+   r.   r/   urlr3   responser   r   r   get_all_tenders=      
z BackendApiClient.get_all_tendersc                 C   r0   )z0Get tenders that don't have location informationz/tenders/without-locationr1   r2   r4   r5   r9   r   r   r   get_tenders_without_locationH   r=   z-BackendApiClient.get_tenders_without_location	tender_idupdate_datac                 C   s8   | j  d| }tj||| jd}| | | d S )z'Update tender with location informationz	/tenders/)r   r*   r4   )r)   r6   putr*   r8   r   )r+   r?   r@   r:   r;   r   r   r   update_tenderS   s   
zBackendApiClient.update_tenders3_key	tenant_idc                 C   s>   | j  d}||dd}tj|d| ji|d}| | |jS )z2Download file from S3 via backend API using S3 keyz/storage/downloadtrue)rD   keydecryptr'   r2   r)   r6   r7   r#   r8   content)r+   rC   rD   r:   r3   r;   r   r   r   download_fileZ   s   
zBackendApiClient.download_filedocument_idc                 C   s@   | j  d| d}d|i}tj|d| ji|d}| | |jS )z;Download document content via backend API using document IDz/documents/z/contentrD   r'   r2   rH   )r+   rK   rD   r:   r3   r;   r   r   r   download_document_by_idf   s   
z(BackendApiClient.download_document_by_idr;   Nc                 C   sr   |j dkr7d|j  }z| }d|v r"| d|d  }W t|W t|   | d|j }Y t|dS )z2Check response status and raise exception if errori  z
API Error message: N)status_coder   textr   )r+   r;   	error_msg
error_datar   r   r   r8   p   s   
z BackendApiClient._check_response)r-   r   )__name__
__module____qualname____doc__r,   intr   r   r<   r>   strrB   bytesrJ   rL   r6   Responser8   r   r   r   r   r%   2   s    
r%   c                   @   s^   e Zd ZU edddZeed< edddZeed< edddZeed	< ed
ddZ	e
ed< dS )ProcessAllTendersRequestTz*Only process tenders without location data)descriptiononly_missingFz!Use regex fallback instead of LLMuse_fallbackz&Use Anthropic Claude (default: OpenAI)use_anthropicd   z$Maximum number of tenders to processr.   N)rS   rT   rU   r
   r]   bool__annotations__r^   r_   r.   rW   r   r   r   r   r[      s
   
 r[   c                   @   s   e Zd ZU eed< eed< dZee ed< dZee ed< dZ	ee ed< dZ
ee ed< dZee ed< dZee ed	< g Zeeeeef   ed
< dS )ProcessAllTendersResponsestatusrM   r   total_tenders	processed
successfulfailedskippedprocessing_timeresultsN)rS   rT   rU   rX   rb   re   r   rW   rf   rg   rh   ri   rj   floatrk   r   r   r   r   r   r   r   rc      s   
  rc   Fr_   c                 C   s8   | rt d}|std|S t d}|std|S )z>Get the appropriate API key from environment based on providerANTHROPIC_API_KEYzDANTHROPIC_API_KEY not found in environment. Set it to use Anthropic.OPENAI_API_KEYz>OPENAI_API_KEY not found in environment. Set it to use OpenAI.)osgetenv
ValueError)r_   r#   r   r   r   get_api_key   s   

rr   tenderc                 C   s  d| v rT| d rTt | d trTt| d dkrT| d d }tdt| d  d|  t |trE|dp:|d}td|  |S t |trTtd|  |S d	| v rftd
| d	   | d	 S d| v rxtd| d   | d S d| v r| d rt | d trt| d dkr| d d }tdt| d  d|  t |tr|dp|d}td|  |S t |trtd|  |S t	d dS )z
    Extract the FIRST document ID/URL/S3 key from tender data.
    Priority: originalDocuments[0] > documentUrl > s3Key > documents[0]

    Returns:
        Document ID (24-char hex), S3 key, or URL of the first tender document
    originalDocumentsr   zFound z documents, using FIRST: r:   s3Keyz,Extracted from originalDocuments[0] (dict): z.Extracted from originalDocuments[0] (string): documentUrlzExtracted from documentUrl: zExtracted from s3Key: 	documentsz" in documents array, using FIRST: zExtracted from documents[0]: z*No document reference found in tender dataN)

isinstancelistlenr   debugdictr7   inforX   r   )rs   	first_docresultr   r   r   extract_pdf_url_from_tender   s<   	




r   
api_clientrK   rD   c              
   C   sH  d}zmt d|  | ||}|rt|dkr$t d|  W dS |ds@t d|  t d|dd   W dS tjd	d
d}|| |j	}W d   n1 sZw   Y  t d| dt| d |W S  t
y } z(t d| dt|  |rtj|rzt| W n   Y W Y d}~dS d}~ww )zADownload PDF using document ID and save to temp file, return pathNz!Downloading PDF for document ID: r   z*Downloaded file is empty for document ID:    %PDF-zIDownloaded file is not a valid PDF (missing PDF header) for document ID: File starts with:    F.pdfdeletesuffixDownloaded PDF to:  ( bytes)z&Error downloading PDF for document ID rN   )r   r}   rL   rz   error
startswithtempfileNamedTemporaryFilewritenamer   rX   ro   pathexistsunlink)r   rK   rD   	temp_pathfile_content	temp_filer   r   r   r   download_pdf_by_document_id   s6   

r   rC   c              
   C   sB  d}zmt d|  | ||}|rt|dkr$t d|  W dS |ds@t d|  t d|dd   W dS tjd	d
d}|| |j	}W d   n1 sZw   Y  t d| dt| d |W S  t
y } z%t dt|  |rtj|rzt| W n   Y W Y d}~dS d}~ww )zDDownload PDF from S3 using S3 key and save to temp file, return pathNzDownloading PDF from S3: r   z%Downloaded file is empty for S3 key: r   zDDownloaded file is not a valid PDF (missing PDF header) for S3 key: r   r   Fr   r   r   r   r   zError downloading PDF from S3: )r   r}   rJ   rz   r   r   r   r   r   r   r   rX   ro   r   r   r   )r   rC   rD   r   r   r   r   r   r   r   download_pdf_from_s3   s6   

r   	extractorr^   c                 C   s  t | dd}t | dd}| dd}|||dddd}z|s3| d	r3d
|d< d|d< |W S t| }|sd|d< td| d| d ddddt  ddddddt dddddid}	z|	||	 d|d< ddd|d< W |W S  t
y }
 ztd t |
  W Y d}
~
|W S d}
~
ww t|t ot|d!kotd"d# |D od$|vod%|v}|rtd&|  t|||}ntd'|  t|||}|sSd(|rd)nd* d+| d,|d< td| d| d-|rd)nd* d+| d.	 ddddt  ddddddt ddd/ddid}	z|	||	 d|d< ddd|d< W |W S  t
yR }
 ztd t |
  W Y d}
~
|W S d}
~
ww ztd0| d| d1 |j||d2}|r|d3r|d4|d3|d5|d6d7t  |d8d9d|d3|d3ddt d:|d3|d4|d6d;did}	|	||	 d|d< ||d< td<| d=|d3  n=ddddt  ddddddt ddd>ddid}	|	||	 d|d< ddd|d< d?|d< td@| dA W |rtj|r t| W |S W |S W |S |r3tj|r4t| w w w  t
y] }
 zt |
|d< tjdB| d+t |
 dCdD W Y d}
~
|S d}
~
ww )Ez6Process a single tender to extract and update location_id tenant	bidNumberUnknownrh   N)r?   
bid_numberrD   rd   locationr   deliveryLocationri   rd   zLocation already existsr   z/No PDF document found in tender, set to "Other"zTender r   z5): No PDF document found, setting location to 'Other'Otherlowfallback)citystatefullAddress
confidenceextractedAtextractionMethod)zmetadata.location_infozmetadata.locationr   logslocation_extraction_failedzNo PDF document found)r   reason)	timestampactiondetails)z$setz$pushsuccess)r   r   r   z/Failed to update tender with 'Other' location:    c                 s   s    | ]}|d v V  qdS )0123456789abcdefABCDEFNr   ).0cr   r   r   	<genexpr>j  s    z(process_single_tender.<locals>.<genexpr>r&   .zDetected document ID: zDetected S3 key: zFailed to download PDF (zdocument IDzS3 keyrN   z), set to "Other"z): Failed to download PDF from z, setting location to 'Other'zFailed to download PDFProcessing tender ))r^   r   r   full_addressr   mediummethodllmlocation_extracted)r   r   r   zUpdated tender z with location: zNo valid location extractedz+No valid location extracted, set to "Other"z'No valid location extracted for tender z, set location to 'Other'zError processing tender Texc_info)rX   r7   r   r   r   r   now	isoformattimerB   r   r   rx   rz   allr}   r   r   process_tender_pdfro   r   r   r   )rs   r   r   r^   r?   rD   r   r   pdf_referencer@   r   is_document_idpdf_pathlocation_datar   r   r   process_single_tender   sF  	


	,




(r   Tr`   r]   r.   c                 C   sF  t   }ztd|  d| d ttt}t|d}t||d}| rlz|j|d}tdt	| d W nF t
yk }	 z)td	t|	 d
 |j|d}dd |D }tdt	| d W Y d}	~	nd}	~	ww |j|d}tdt	| d |d| }g }
d}d}d}t|D ]9\}}td|d  dt	|  t||||}|
| |d dkr|d7 }q|d dkr|d7 }q|d7 }qt   | }ddt	| d| d| d| d	t	|t	||||||
d	W S  t
y" }	 z'tjdt|	 dd  d!d"t|	 dddddt   | g d	W  Y d}	~	S d}	~	ww )#zD
    Main function to process all tenders and extract locations
    z7Starting location extraction for tenders (only_missing=, limit=r   )r_   )r#   r_   )r.   zFetched z tenders without locationzSpecialized endpoint failed: z, fetching all tendersc                 S   s   g | ]	}| d s|qS )r   )r7   )r   tr   r   r   
<listcomp>  s    z/process_all_tenders_handler.<locals>.<listcomp>zFiltered to Nz tendersr   r      r&   rd   r   ri   	completedz
Processed z
 tenders: z successful, z	 failed, z skipped	rd   rM   re   rf   rg   rh   ri   rj   rk   z&Error in process_all_tenders_handler: Tr   r   zProcessing failed: )r   r   r}   r%   r"   r$   rr   r   r>   rz   r   r   rX   r<   	enumerater   appendr   )r]   r^   r_   r.   
start_timer   r#   r   tendersr   rk   rg   rh   ri   idxrs   r   rj   r   r   r   process_all_tenders_handler  st   	

"



 
r   z/process-all-tenders)response_modelrequestbackground_tasksc                    s   z3t d| j d| j  t d |jt| j| j| j| jd tdd| j dddddddg d		W S  t	yR } zt j
d
| dd tdt|dd}~ww )a  
    Process all tenders to extract location information (NON-BLOCKING)

    This endpoint:
    1. Starts processing in the background immediately
    2. Returns a response indicating processing has started
    3. Processing happens asynchronously without blocking other API requests

    Background processing:
    1. Fetches all tenders from the backend
    2. For each tender, downloads the PDF document from S3
    3. Extracts location information using LLM (city, state, full address)
    4. Updates the tender record in the backend with the extracted location

    Parameters:
    - only_missing: Default True - Only process tenders without location data
    - use_fallback: Default False - Use regex fallback instead of LLM
    - use_anthropic: Default False - Use Anthropic Claude instead of OpenAI
    - limit: Default 100 - Maximum number of tenders to process in one run

    This endpoint is designed to be run as a cron job once a day.
    z3Received process-all-tenders request: only_missing=r   z=Starting background task for tender processing (non-blocking))r]   r^   r_   r.   
processingz4Location extraction started in background for up to z2 tenders. Processing will continue asynchronously.r   r   z Error starting background task: Tr   i  )rO   detailN)r   r}   r]   r.   add_taskr   r^   r_   rc   r   r   r   rX   )r   r   r   r   r   r   process_all_tendersN  s6   
	r   z/healthc                      s   dddt dS )z-Health check endpoint for location finder APIhealthyzlocation-finderz2.0.0)rd   serviceversionbackend_api)r"   r   r   r   r   health_check  s   r   )r   )F)TFFr`   )3rV   ro   r   loggingr6   r   typingr   r   r   r   fastapir   r   r   pydanticr	   r
   r   r   pathlibr   location_finder_gemr   basicConfigINFO	getLoggerrS   r   routerrX   r   configr7   rp   r"   r$   r%   r[   rc   ra   rr   r   r   r   r   rW   r   postr   r   r   r   r   r   <module>   sv   
	N3&*

 W

Y5