o
    !œ6inó  ã                   @   s0  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
 d dlZd dlZd dlZd dlZd dlZd dlZd dlmZmZmZmZmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z#m$Z$ d dl%m&Z& e&ƒ  e  'd¡Z(e  'd	d
¡Z)e  'd¡Z*e  'dd¡Z+e  'dd¡Z,dZ-e  'd¡Z.ej/ej0dd e 1e2¡Z3ej4e.d eZ5e  'dd¡Z6ej7e(dZ8e)Z9e!e*e,dZ G dd„ dƒZ:dd„ Z;d@dd„Z<dd„ Z=d d!„ Z>d"d#„ Z?d@d$d%„Z@d&d'„ ZAd(d)„ ZBd*d+„ ZCd,d-„ ZDd@d.d/„ZEd0d1„ ZFd2d3„ ZGd4d5„ ZHd6d7„ ZId8d9„ ZJd:d;„ ZKd<d=„ ZLd@d>d?„ZMdS )Aé    N)ÚPath)ÚDictÚListÚAnyÚOptionalÚTuple)Úcontent_types)ÚOpenAI)Ú!extract_documents_text_compatibleÚcreate_document_extractor)Úload_dotenvÚANTHROPIC_API_KEYÚANTHROPIC_MODELzclaude-3-5-haiku-latestÚOPENAI_API_KEYÚOPENAI_MODELz Qwen/Qwen3-Next-80B-A3B-InstructÚ
OPENAI_URLz#https://api.deepinfra.com/v1/openaiz/Path/to/Company/Info/summaryÚGEMINI_API_KEYz4%(asctime)s - %(name)s - %(levelname)s - %(message)s)ÚlevelÚformat)Úapi_keyÚGENAI_ENGINEÚclaude)r   Úbase_urlc                   @   s4   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdS )Úcolorz[95mz[96mz[36mz[94mz[92mz[93mz[91mz[1mz[4mz[0mN)Ú__name__Ú
__module__Ú__qualname__ÚPURPLEÚCYANÚDARKCYANÚBLUEÚGREENÚYELLOWÚREDÚBOLDÚ	UNDERLINEÚEND© r'   r'   ú</var/www/html/minaions-tender/ai-engine/tender_automation.pyr   ?   s    r   c                    sè  dd„ }dd„ ‰dd„ ‰ ‡ ‡fdd„}z;t | d	ƒ)}t |¡}||ƒ\}}}}|d
u r3t d¡ nt d|d › d|› ¡ t d¡ g }	tƒ }
t|jƒD ]\}}| ¡ }|d
url||krlt d|d › d¡ qOd|v rð|d }|rð|D ]w}z[| 	¡ }| 
d¡dkrÔd|v rÔd|d v rÔ|d d }t|tƒrÔ||
vrÔ|||||||||ƒrÇ|	 ||d dœ¡ |
 |¡ t d|d › d|› ¡ nt d|d › d|› ¡ W qx tyï } zt d|› ¡ W Y d
}~qxd
}~ww zV| ¡ }|rE|D ]J}t|dƒrD|jrD|j|
vrD|d
u s||k r6|	 |j|d dœ¡ |
 |j¡ t d|d › d|j› ¡ qút d|d › d|j› ¡ qúW qO ttfyR   Y qOw |	st d¡ |d
u rt|jƒD ]6\}}| ¡ }t d|¡}|D ]$}||
vr˜|	 ||d dœ¡ |
 |¡ t d|d › d|› ¡ quqenst|ƒD ]9}|j| }| ¡ }t d|¡}|D ]$}||
vr×|	 ||d dœ¡ |
 |¡ t d|d › d|› ¡ q´q¡|d
ur|rt d|¡}|D ]$}||
vr|	 ||d dœ¡ |
 |¡ t d|d › d |› ¡ qët d!d"› ¡ t d#t|	ƒ› ¡ t d"› ¡ |	s5	 W d
  ƒ W d$S d%d&„ |	D ƒW  d
  ƒ W S 1 sHw   Y  W d
S  tys } zt d't|ƒ› ¡ d(t|ƒ› W  Y d
}~S d
}~ww ))uç   
    Extract embedded hyperlinks from PDF files.
    Skip all hyperlinks that appear AFTER the 'à¤…à¤¸à¥à¤µà¥€à¤•à¤°à¤£/Disclaimer' text starts.
    Include hyperlinks that appear BEFORE the disclaimer, even on the same page.
    c           
      S   s–   g d¢}t | jƒD ]?\}}| ¡ }| ¡ }|D ]0}| | ¡ ¡}|dkrG|d|… }||d… }	t d|d › d|› ¡ ||||	f    S qq	dS )zª
        Find page number and the text position where disclaimer starts.
        Returns (page_num, char_position, text_before_disclaimer, text_after_disclaimer)
        )u#   à¤…à¤¸à¥à¤µà¥€à¤•à¤°à¤£/disclaimerÚ
Disclaimeru#   à¤…à¤¸à¥à¤µà¥€à¤•à¤°à¤£/DisclaimeréÿÿÿÿNzDisclaimer found at Page é   z, character position )NNNN)Ú	enumerateÚpagesÚextract_textÚlowerÚfindÚloggerÚinfo)
Ú
pdf_readerÚdisclaimer_markersÚpage_numÚpageÚ	page_textÚpage_text_lowerÚmarkerÚposÚtext_beforeÚ
text_afterr'   r'   r(   Úfind_disclaimer_infoR   s   üþz4extract_links_from_pdf.<locals>.find_disclaimer_infoc                 S   s2   zd| v r| d }t |d ƒW S W dS    Y dS )z¼
        Get the Y-coordinate of an annotation from its rectangle.
        Returns Y position or None if not available.
        In PDF coordinates, Y increases from bottom to top.
        z/Rectr+   N)Úfloat)Úannotation_objÚrectr'   r'   r(   Úget_annotation_y_positionk   s   üþz9extract_links_from_pdf.<locals>.get_annotation_y_positionc                 S   sR   z"| j }t|jƒ}t|ƒ}|dkr || }|d|d   }|W S W dS    Y dS )z“
        Estimate the Y-coordinate where the disclaimer text appears.
        This is approximate since we're working with extracted text.
        r   r+   g333333÷?N)Úmediaboxr>   ÚheightÚlen)r6   r7   Údisclaimer_positionrB   Úpage_heightÚtext_lengthÚfractionÚestimated_yr'   r'   r(   Úestimate_disclaimer_y_position{   s   
ûþz>extract_links_from_pdf.<locals>.estimate_disclaimer_y_positionc                    sH  |du rdS ||k rdS ||krdS d| v r8d| d v r8| d d }||v r-t  d¡ dS ||v r8t  d¡ dS ˆ| ƒ}	ˆ |||ƒ}
|	durn|
durn|	|
kr^t  d|	d	›d
|
d	›d¡ dS t  d|	d	›d|
d	›d¡ dS dD ]&}|| v r–t| | ƒ}|r–||v rŠt  d¡  dS ||v r–t  d¡  dS qpt  d|d › d¡ dS )zH
        Determine if a URL appears before the disclaimer text.
        NTFú/Aú/URIz(URI found before disclaimer (text match)z'URI found after disclaimer (text match)zAnnotation Y (z.1fz) > Disclaimer Y (z) - before disclaimerz) <= Disclaimer Y (z) - after disclaimer)z	/Contentsz/Tz/TUz$Display text found before disclaimerz#Display text found after disclaimerz&Cannot determine URL position on Page r+   z - skipping to be safe)r1   Údebugr2   Ústr)r?   r5   r6   r7   Údisclaimer_pageÚdisclaimer_char_posÚtext_before_disclaimerÚtext_after_disclaimerÚuriÚannotation_yÚdisclaimer_yÚkeyÚcontext_text©rJ   rA   r'   r(   Úis_url_before_disclaimer”   sF   



€z8extract_links_from_pdf.<locals>.is_url_before_disclaimerÚrbNz3No disclaimer section found - will extract all URLsz"Disclaimer section starts on Page r+   z at character position z?Will include URLs before disclaimer, skip URLs after disclaimerzPage z&: Skipping (entirely after disclaimer)z/Annotsz/Subtypez/LinkrK   rL   )Úurlr6   u   : âœ“ Extracted URL: u&   : âœ— Skipped URL (after disclaimer): zError processing annotation: r[   u   : âœ— Skipped URL: z1No embedded links found, using text extraction...z'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+z: Extracted URL from text: z/: Extracted URL from text (before disclaimer): Ú
z<============================================================zSUMMARY: Total URLs extracted: z"No links found in the PDF documentc                 S   ó   g | ]}|d  ‘qS )r[   r'   )Ú.0Úlinkr'   r'   r(   Ú
<listcomp>F  ó    z*extract_links_from_pdf.<locals>.<listcomp>zError: z!Error extracting links from PDF: )ÚopenÚPyPDF2Ú	PdfReaderr1   r2   Úsetr,   r-   r.   Ú
get_objectÚgetÚ
isinstancerN   ÚappendÚaddÚ	ExceptionrM   Ú	get_linksÚhasattrr[   ÚAttributeErrorÚ	TypeErrorÚreÚfindallÚrangerD   Úerror)Úpdf_pathr=   rY   Úfiler3   rO   rP   r;   r<   Ú	all_linksÚ	seen_urlsr5   r6   r7   ÚannotationsÚ
annotationÚannotation_objectrS   ÚeÚ
page_linksr_   Úurlsr[   r'   rX   r(   Úextract_links_from_pdfK   sÚ   ?

þþ
€€þ þ€ÿ



€üý



€ü

€r*Žt€þr~   c                 C   s¸   |d u rt }	 g }|D ]A}z | d|› ¡ t || ¡}| d|› d|› ¡ | |¡ W q tyL } z| d|› dt|ƒ› ¡ W Y d }~qd }~ww | dt|ƒ› d|› ¡ |S )NzDownloading file from link: zDownloaded file from link z and saved to path zError downloading ú: zTotal downloaded files z
 at paths )r1   r2   ÚfdrÚdownload_file_mainri   rk   rN   rD   )Úbid_dirÚlinksÚprocess_loggerÚdownloaded_filesr_   Ú	file_pathr{   r'   r'   r(   Údownload_linked_documentsL  s    
&€ÿr‡   c                    sb   t  ¡ }t| dƒ‰ t‡ fdd„dƒD ]}| |¡ qW d  ƒ | ¡ S 1 s(w   Y  | ¡ S )z²
    Calculate SHA-256 hash of file content
    
    Args:
        file_path (Path): Path to the file
        
    Returns:
        str: Hexadecimal hash of the file content
    rZ   c                      s
   ˆ   d¡S )Ni   )Úreadr'   ©Úfr'   r(   Ú<lambda>u  s   
 z%calculate_file_hash.<locals>.<lambda>ó    N)ÚhashlibÚsha256rb   ÚiterÚupdateÚ	hexdigest)r†   Úsha256_hashÚ
byte_blockr'   r‰   r(   Úcalculate_file_hashg  s   
ÿ
ÿür”   c                 C   sp  t  d| › ¡ tt| ƒ d¡ƒ}|st  d¡ g g fS i }|D ] }t  d|j› ¡ t|ƒ}||v r;||  |¡ q |g||< q g }g }| ¡ D ]P\}}|d }| |¡ |dd… D ]<}	zt	 
|	¡ | |	¡ t  d|	j› d	|j› d
¡ W q\ ty˜ }
 zt  d|	j› dt|
ƒ› ¡ W Y d}
~
q\d}
~
ww qIt  dt|ƒ› dt|ƒ› d¡ dd„ |D ƒdd„ |D ƒfS )aH  
    Remove duplicate PDF files from a directory based on content.
    For each set of identical files, the first one found is kept and others are removed.
    
    Args:
        directory_path (str): Path to directory containing PDF files
        
    Returns:
        tuple: (kept_files, removed_files) lists of filenames
    u%   ðŸ” Checking for duplicate PDFs in: z*.pdfzNo PDF files foundzAnalyzing: r   r+   NzRemoved duplicate: z
 (same as ú)zError removing r   zKept z unique files, removed z duplicatesc                 S   s   g | ]}|j ‘qS r'   )Úname)r^   rŠ   r'   r'   r(   r`   ¯  s    z)remove_duplicate_pdfs.<locals>.<listcomp>)r1   r2   Úlistr   Úglobr–   r”   ri   ÚitemsÚosÚremoverk   rN   rD   )Údirectory_pathÚ	pdf_filesÚhash_mapr†   Ú	file_hashÚ
kept_filesÚremoved_filesÚ
file_pathsÚ	kept_fileÚ	duplicater{   r'   r'   r(   Úremove_duplicate_pdfsz  s:   



 (€ÿû r¥   c                 C   sŠ   t | ƒ}| ¡ st d| › ¡ g S | ¡ s t d| › ¡ g S g }| ¡ D ]}| ¡ r5| t| 	¡ ƒ¡ q&t dt
|ƒ› d| › ¡ |S )zö
    Create a list of all files in the specified directory with their full paths.
    
    Args:
        directory_path (str): Path to the directory to scan
        
    Returns:
        list: List of full paths to all files in the directory
    zDirectory does not exist: zPath is not a directory: zFound z
 files in )r   Úexistsr1   r2   Úis_dirÚiterdirÚis_fileri   rN   ÚabsoluterD   )rœ   Údir_pathr¢   Úitemr'   r'   r(   Úlist_files_in_directory±  s   €r­   c                    sÈ  ‡ fdd„}‡ fdd„}‡ fdd„}g d¢}i }|D ]
}dg dd	œ||< qt d
d„ |  ¡ D ƒƒ}	|d|	› dƒ g }
|dt| ƒ› dƒ |  ¡ D ]î\}}tj |¡}|d|› dt|ƒ› dƒ | ¡ sk|d|› ƒ qHt|ƒ}d}t	|||ƒ}|dt|ƒ› dƒ t
|ƒD ]°\}}|d }|d }t||d t|ƒ|||ƒ}zstdkr·tjjd|gtjddddd }|j}n6td!krÑtjjtdd"d#d$|d%œgd&}|jd" j}ntd'krítjjjtd(d#d%œd$|d%œgd)}|jd" jj}t||||ƒ}|
 |||||d*œ¡ |d+|d › d,t|ƒ› d-|› ƒ W q… t y5 } z|d.|d › d-|› d/t!|ƒ› ƒ W Y d0}~q…d0}~ww qH|d1t|
ƒ› ƒ |d2ƒ t"||
|ƒ |d3ƒ t#|ƒ |g dd"g d4œ|d5< |d6ƒ ||
fS )7a  
    Analyze tender documents with Claude LLM

    Args:
        documents_text (dict): Dictionary mapping file paths to their text content
        process_logger: Optional process logger for detailed logging

    Returns:
        dict: Extracted information from tender documents
    c                    ó    t  | ¡ ˆ rˆ  | ¡ d S d S ©N©r1   r2   ©Úmsg©r„   r'   r(   Úlog_infoé  ó   
ÿz)analyze_tender_with_LLM.<locals>.log_infoc                    r®   r¯   ©r1   rs   r±   r³   r'   r(   Ú	log_errorî  rµ   z*analyze_tender_with_LLM.<locals>.log_errorc                    r®   r¯   )r1   Úwarningr±   r³   r'   r(   Úlog_warningó  rµ   z,analyze_tender_with_LLM.<locals>.log_warning)ú;Eligibility/Qualification Criteria or conditions for bidderzPre-bid meetingzEvaluation criteria or methodz"Documents needed to submit the bidz"Scope of work of the whole projectzAmount of EMD feeú?Relaxation or preference given to any kind of company or bidderzPayment termszBOQ requirementsÚRisksÚ	RedliningÚ )ÚcontentÚ	citationsÚsummaryc                 s   s    | ]}t |ƒV  qd S r¯   )rD   )r^   Útextr'   r'   r(   Ú	<genexpr>  s   € z*analyze_tender_with_LLM.<locals>.<genexpr>z&Total text size across all documents: ú characterszStarting LLM analysis of z
 documentszAnalyzing document: z (z characters)zSkipping empty document: i0u  zSplit document into z chunksrÂ   Ú
page_ranger+   Úgeminizgemini-2.5-flashz\You are an expert in analyzing tender documents. Extract information with precise citations.i@  çš™™™™™¹?©Úsystem_instructionÚmax_output_tokensÚtemperature©ÚmodelÚcontentsÚconfigr   r   zyYou are an expert in analyzing tender documents. Extract information with precise citations in the specified JSON format.Úuser©Úroler¿   ©rÍ   Ú
max_tokensrË   ÚsystemÚmessagesÚopen_llmrÕ   ©rÍ   rÖ   )Údoc_nameÚ	chunk_idxrÅ   ÚresponseÚparsed_datazSuccessfully analyzed chunk ú/z of zError analyzing chunk z with LLM: Nz<Completed analysis of all documents. Total chunks analyzed: z$Combining analyses with citations...z Generating category summaries...)r¿   rÀ   rÁ   Úcitation_countÚdocuments_referencedzAnnexures or forms or formatsz#LLM analysis completed successfully)$ÚsumÚvaluesrD   r™   rš   ÚpathÚbasenameÚstripÚextract_page_markersÚ create_chunks_with_page_trackingr,   Úcreate_citation_promptÚ	llm_modelÚgemini_clientÚmodelsÚgenerate_contentÚtypesÚGenerateContentConfigrÂ   Úclaude_clientrÖ   ÚcreateÚclaude_modelr¿   ÚopenaiÚchatÚcompletionsr   ÚchoicesÚmessageÚ!parse_llm_response_with_citationsri   rk   rN   Úcombine_analyses_with_citationsÚgenerate_category_summaries)Údocuments_textÚannexure_hintsr„   r´   r·   r¹   Úinfo_to_extractÚextracted_info_with_citationsr¬   Útotal_text_sizeÚdoc_analysesÚdoc_pathÚdoc_textrÙ   Úpage_markersÚ
chunk_sizeÚ
doc_chunksrÚ   Ú
chunk_dataÚ
chunk_textÚchunk_page_rangeÚpromptrÛ   Úresponse_textÚchunk_analysisr{   r'   r³   r(   Úanalyze_tender_with_LLMÝ  s¶   ýþýý	ÿû	þþÿû&"€þÄ@
ûr
  c           	      C   sš   g }dg}|D ]}t  || t j¡D ]}t| d¡ƒ}| ¡ }| ||f¡ qq|jdd„ d tƒ }g }|D ]\}}||vrJ| ||f¡ | 	|¡ q6|S )zÈ
    Extract page markers from document text to track page numbers

    Args:
        doc_text (str): Document text content

    Returns:
        list: List of tuples (page_number, text_position)
    zDoc Page Number:\s*(\d+)r+   c                 S   s   | d S )Nr+   r'   )Úxr'   r'   r(   r‹   —  s    z&extract_page_markers.<locals>.<lambda>)rV   )
rp   ÚfinditerÚ
IGNORECASEÚintÚgroupÚstartri   Úsortre   rj   )	r   r  ÚpatternsÚpatternÚmatchr5   ÚpositionÚ
seen_pagesÚunique_markersr'   r'   r(   rå     s$   
ÿý
€rå   c           	      C   sâ   g }t | ƒ|krtdt | ƒ|ƒ}| | |dt | ƒdœ¡ |S d}|t | ƒk ro|| }|t | ƒk rS|  d||¡}|||d  krB|}n|  d||¡}|||d  krS|}t|||ƒ}| | ||… |||dœ¡ |}|t | ƒk s&|S )a7  
    Create text chunks while tracking which pages each chunk spans

    Args:
        doc_text (str): Document text
        chunk_size (int): Maximum chunk size
        page_markers (list): List of page markers with positions

    Returns:
        list: List of chunk dictionaries with text and page range
    r   )rÂ   rÅ   Ú	start_posÚend_posz

gffffffæ?r\   gš™™™™™é?)rD   Úget_page_range_for_textri   Úrfind)	r   r  r  ÚchunksrÅ   r  ÚendÚparagraph_endÚline_endr'   r'   r(   ræ   £  s<   ü
üéræ   c           	      C   s¢   |sddg dœS d}d}t |ƒD ]\}\}}|| kr|}q t |ƒD ]\}\}}||kr/|}q" g }|durD|durDtt||d ƒƒ}n|durK|g}|||dœS )zþ
    Determine which pages a text span covers

    Args:
        start_pos (int): Start position in text
        end_pos (int): End position in text
        page_markers (list): List of page markers

    Returns:
        dict: Page range information
    N)Ú
start_pageÚend_pager-   r+   )r,   r—   rr   )	r  r  r  r   r!  Úir5   r  r-   r'   r'   r(   r  Ø  s*   ýr  c           	      C   s`   d  dd„ t|ƒD ƒ¡}d}|d rd|d › }d| › d|› d	|› d|› d
|› d|› d}|S )z‘
    Create a balanced prompt that extracts all categories with equal attention
    while providing category-specific guidance where needed.
    r\   c                 S   s"   g | ]\}}|d  › d|› ‘qS )r+   z. r'   )r^   r"  r¬   r'   r'   r(   r`   
  s   " z*create_citation_prompt.<locals>.<listcomp>r¾   r-   zThis chunk spans pages: zt
You are analyzing tender/RFP documents for comprehensive information extraction with precise citations.

Document: z
Chunk: rÝ   uL  

â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•
TASK: Extract ALL of the following information categories from this chunk diligently.
â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•

CATEGORIES TO EXTRACT:
uG2  

â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•
SECTION 1: UNIVERSAL RULES (Apply to ALL categories)
â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•

ã€CITATION RULESã€‘- Apply to EVERY category extracted:
â€¢ Extract the EXACT text span that contains the relevant information
â€¢ Use page numbers from "--- Doc Page Number: X ---" markers in the document
â€¢ If no specific page number found, use null
â€¢ Confidence ratings:
  - high: Direct, unambiguous information
  - medium: Implied or partially stated information  
  - low: Inferred or unclear information

ã€FORMATTING RULESã€‘- Apply to EVERY category's content:
â€¢ Clean up and rephrase content - don't copy verbatim. Summarize and rephrase information.
â€¢ Use markdown: **bold headings**, *italics for emphasis*, bullet points
â€¢ Use proper paragraph breaks and logical organization with clear spacing
â€¢ Make content user-friendly and easy to understand

ã€OUTPUT FORMATã€‘- Required JSON structure:
{
  "extracted_data": [
    {
      "category": "Category name",
      "found": true/false,
      "content": "Well-formatted, rephrased information with proper structure and line breaks",
      "citations": [
        {
          "text_span": "Exact text from document that supports this information",
          "page_number": page_number_or_null,
          "confidence": "high/medium/low"
        }
      ]
    }
  ]
}

â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•
SECTION 2: CATEGORY-SPECIFIC GUIDANCE
â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•

Below are specific instructions for categories that need additional clarity.
For categories not listed here, apply standard extraction using the universal rules above.

â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
â”‚ CATEGORY: Eligibility/Qualification Criteria                                â”‚
â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
Extract ALL criteria bidders must meet:
â€¢ Financial criteria (turnover, net worth requirements)
â€¢ Technical criteria (experience, past work orders, certifications)
â€¢ Legal criteria (registration requirements, not blacklisted)
â€¢ Capacity criteria (equipment, manpower)
â€¢ Any minimum/maximum thresholds specified
Include specific values, percentages, and time periods mentioned.

â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
â”‚ CATEGORY: Pre-bid meeting                                                   â”‚
â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
Extract if any information is given:
â€¢ Date and time of the meeting
â€¢ Mode (online/offline/hybrid)
â€¢ Venue or online meeting link
â€¢ Contact details for queries

â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
â”‚ CATEGORY: Evaluation criteria or method                                     â”‚
â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
Extract the complete evaluation methodology:
â€¢ Scoring criteria and weightages
â€¢ Technical vs commercial evaluation split
â€¢ Minimum qualifying marks/scores
â€¢ Complete scoring tables if specified
â€¢ L1/lowest bid selection criteria

â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
â”‚ CATEGORY: Documents needed to submit the bid                                â”‚
â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
Focus on documents that bidders should submit with their bids, 
such as certificates, experience letters, financial documents, compliance statements etc. 
If a document is mentioned as 'Additional Doc 1 (Requested in ATC) or Additional Document 2 (Requested in ATC) etc' 
in the RFP/Tender, it needs to be ignored. These are documents with generic description like 
'Additional document format <n> as specified in ATC'. These documents should not be included in the final list 
of documents. 
*Also DO NOT include any annexures, forms, proformas given in the RFP document chunk in this list.* Annexures will be taken care of separately.

â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
â”‚ CATEGORY: Amount of EMD fee                                                 â”‚
â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
Extract:
â€¢ Exact EMD amount
â€¢ Mode of payment (DD, BG, online, etc.)
â€¢ Bank details if specified

â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
â”‚ CATEGORY: Relaxation or preference                                          â”‚
â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
Extract any preferences given to:
â€¢ MSE/MSME bidders
â€¢ Startups (DIPP registered)
â€¢ SC/ST owned enterprises
â€¢ Local suppliers
â€¢ Make in India preferences
â€¢ Any turnover/experience relaxations
â€¢ Any EMD Fee relaxations

â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
â”‚ CATEGORY: Payment terms                                                     â”‚
â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
Extract complete payment structure:
â€¢ Payment milestones and percentages
â€¢ Advance payment provisions
â€¢ Running bill/monthly payment terms
â€¢ Final payment conditions
â€¢ Payment timeline after bill submission
â€¢ Any deductions (retention, taxes)

â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
â”‚ CATEGORY: BOQ requirements                                                  â”‚
â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
Extract:
â€¢ Bill of Quantities structure
â€¢ How rates should be quoted (per unit, lump sum, percentage)
â€¢ Any specific BOQ filling instructions
â€¢ Price variation clauses
â€¢ Quantity variation limits

â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
â”‚ CATEGORY: Scope of work                                                     â”‚
â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
Extract:
â€¢ Main deliverables and activities
â€¢ Work location/site details
â€¢ Duration/timeline
â€¢ Key milestones
â€¢ Contractor's responsibilities
â€¢ Items/services included and excluded

â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
â”‚ CATEGORY: Risks                                                             â”‚
â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
Extract all risky clauses:
â€¢ Penalty clauses
â€¢ Liquidated Damages (LD) / Mutually Agreed Damages (MAD)
â€¢ Indemnification requirements
â€¢ Termination conditions
â€¢ Forfeiture conditions
â€¢ Liability caps or unlimited liability
â€¢ Force majeure exclusions

â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
â”‚ CATEGORY: Redlining                                                         â”‚
â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
Identify potential queries/clarifications for:
â€¢ Ambiguous terms or conditions
â€¢ One-sided or unfair clauses
â€¢ Missing information that should be clarified
â€¢ Terms that could be negotiated in pre-bid meeting
â€¢ Inconsistencies in the document

â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•
SECTION 3: FINAL REMINDERS
â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•

âœ¦ Extract ALL 12 categories - do not skip any
âœ¦ If a category is not found in this chunk, mark found: false and content: "Not found in this chunk"
âœ¦ Apply CITATION RULES to every extraction
âœ¦ Apply FORMATTING RULES to all content
âœ¦ Each category deserves equal attention and thoroughness

â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•
DOCUMENT CHUNK TO ANALYZE:
â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•

u­  

â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•
Return ONLY the JSON response with all 12 categories. No additional text.
Use proper markdown formatting (**bold**, *italic*, bullet points, proper spacing, line breaks) in content fields.
)Újoinr,   )	rÙ   rÚ   Útotal_chunksr  rû   rÅ   Úcategories_listÚ	page_infor  r'   r'   r(   rç     s*   ýüüûô < Ä Crç   c                 C   sØ   zt  | ¡W S  t jy   Y nw z*|  dd¡}| dd¡}| dd¡}| dd¡}| d	d¡}| d
d¡}t  |¡W S  t jyE   Y nw z|  dd¡}t  |¡W S  t jy\   Y nw t d| dd… › d¡ |S )z3Safely parse JSON with multiple fallback strategiesú\z\\z\\nz\nz\\tz\tz\\rz\rz\\\\z\\"z\"r¾   zCould not parse JSON: Néd   ú...)ÚjsonÚloadsÚJSONDecodeErrorÚreplaceÚloggingr¸   )Újson_stringÚdefaultÚfixed_stringÚcleaned_stringr'   r'   r(   Úsafe_json_parseÕ  s0   ÿÿÿr3  c           	      C   s¶   z2t  d| t j¡}|r| d¡}nt  d| t j¡}|r!| d¡}ntdƒ‚t|ƒ}t||||ƒ}|W S  tyZ } zt 	d|› d|› dt
|ƒ› ¡ t| ||ƒW  Y d	}~S d	}~ww )
a,  
    Parse LLM response and extract structured data with citations

    Args:
        response_text (str): Raw LLM response
        doc_name (str): Document name
        chunk_idx (int): Chunk index
        chunk_text (str): Original chunk text

    Returns:
        dict: Parsed structured data
    z```json\s*(.*?)\s*```r+   z\{.*\}r   zNo JSON found in responsezError parsing LLM response for z chunk r   N)rp   ÚsearchÚDOTALLr  Ú
ValueErrorr3  Úvalidate_and_enhance_citationsrk   r1   r2   rN   Úfallback_text_parsing)	r  rÙ   rÚ   r  Ú
json_matchÚjson_strrÜ   Úenhanced_datar{   r'   r'   r(   rö   ÷  s$   ÿ €ýrö   c              
   C   sì   dg i}d| vr
|S | d D ]e}|  dd¡sq|  dd¡d|  dd¡g d	œ}|  d
g ¡D ];}||  dd¡|  d¡||  dd¡dœ}|d }	|	r`|	 ¡ | ¡ v r`| ¡  |	 ¡ ¡}
|
dkr`|
|d< |d
  |¡ q,|d
 rs|d  |¡ q|S )a+  
    Validate and enhance citation data from LLM response

    Args:
        parsed_data (dict): Parsed JSON data from LLM
        doc_name (str): Document name
        chunk_idx (int): Chunk index
        chunk_text (str): Original chunk text

    Returns:
        dict: Enhanced citation data
    Úextracted_dataÚfoundFÚcategoryÚUnknownTr¿   r¾   )r>  r=  r¿   rÀ   rÀ   Ú	text_spanÚpage_numberÚ
confidenceÚmedium)Údocument_namer@  rA  Úchunk_indexrB  r*   Útext_position)rg   r/   r0   ri   )rÜ   rÙ   rÚ   r  r;  r¬   Úenhanced_itemÚcitationÚenhanced_citationr@  Úspan_positionr'   r'   r(   r7    s8   

ü

û	€r7  c                 C   s   dg i}|S )zû
    Fallback method to parse text when JSON parsing fails

    Args:
        response_text (str): Raw response text
        doc_name (str): Document name
        chunk_idx (int): Chunk index
    Returns:
        dict: Basic parsed data structure
    r<  r'   )r  rÙ   rÚ   r<  r'   r'   r(   r8  U  s   r8  c                 C   s  |D ]†}d|vr	q|d }|d }|d }|  dg ¡D ]l}|  dd¡}d}	|D ]}
| ¡ |
 ¡ v s9|
 ¡ | ¡ v r=|
}	 nq'|	sAq|  dd¡}|r‡| ¡ r‡| |	 d ri| |	 d  d	|› d
|d › d|› 7  < nd|› d
|d › d|› | |	 d< | |	 d  |  dg ¡¡ qqdS )a  
    Combine all chunk analyses into final structured output with citations

    Args:
        extracted_info_with_citations (dict): Final output dictionary
        doc_analyses (list): List of chunk analyses
        info_to_extract (list): Categories to extract
    rÜ   rÙ   rÚ   r<  r>  r¾   Nr¿   z

--- From z (chunk r+   z) ---
z	--- From rÀ   )rg   r/   rä   Úextend)rü   rþ   rû   ÚanalysisrÜ   rÙ   rÚ   r¬   r>  Úmatching_categoryÚextract_categoryr¿   r'   r'   r(   r÷   g  s6   	 þ,"
ÿ€êør÷   c                 C   s²   |   ¡ D ]R\}}|d }|d }|r| ¡ dkrd|d< n&| d¡}t|ƒdkr;t|ƒdkr6|d	d… d
 n||d< n|d d |d< t|ƒ|d< ttdd„ |D ƒƒƒ|d< qd	S )zŒ
    Generate summaries for each category

    Args:
        extracted_info_with_citations (dict): Extracted information with citations
    r¿   rÀ   r¾   zNo information foundrÁ   Ú.é   éÈ   Nr)  r   rÞ   c                 S   r]   )rD  r'   )r^   Úcr'   r'   r(   r`   ª  ra   z/generate_category_summaries.<locals>.<listcomp>rß   )r™   rä   ÚsplitrD   r—   re   )rü   r>  Údatar¿   rÀ   Ú	sentencesr'   r'   r(   rø   “  s   

&ðrø   c           
      C   s4  t j | d¡}t j|dd t j |d¡}t j |d¡}t|dƒ}tj||dd W d	  ƒ n1 s4w   Y  t|dd
dF}| d¡ | d¡ | ¡ D ]/\}}| |› d¡ | dt	|ƒ d ¡ | | 
¡ pmd¡ | d¡ d|v r~|d  
¡ }	qOW d	  ƒ n1 s‰w   Y  t d|› ¡ ||	fS )zé
    Save extracted information to a file
    
    Args:
        bid_dir (str): Directory to save the file to
        extracted_info (dict): Extracted information to save
        
    Returns:
        str: Path to the saved file
    Útender_analysisT)Úexist_okztender_analysis.txtztender_analysis.jsonÚwé   )ÚindentNzutf-8)ÚencodingzTENDER ANALYSIS REPORT
z4==================================================

r\   Ú-zNot found in the documentsz6

==================================================

ÚEligibilityr¿   zSaved extracted information to )rš   râ   r#  Úmakedirsrb   r*  ÚdumpÚwriter™   rD   rä   r1   r2   )
r‚   Úextracted_infoÚoutput_pathÚoutput_text_fileÚoutput_json_fileru   rŠ   r>  r2   Úeligibilityr'   r'   r(   Úsave_extracted_info¬  s,   ÿ


€úürf  c                   C   s   t  dt› ¡ dS )zj
    Get company information from Google Docs
    
    Returns:
        str: Company information text
    z!Getting company information from uh	  
    Yugasa Company Information for Government tendering:

Company Name: Yugasa Software Labs Pvt Ltd
Office addresses: 
Gurgaon Address: Yugasa Software Labs, 3rd floor, Tower B, Unitech Cyber Park, Sector 39, Gurgaon 122001, Haryana
Lucknow Address: Yugasa Software Labs, 3rd floor, TC-14, Vibhuti Khand, Gomti Nagar, Lucknow, Uttar Pradesh 226010
US Address: Yugasa Software LLC, 370 Campus Drive, Somerset, New Jersey 08873

Company registration:
Yugasa Software Labs Pvt Ltd is a legal entity in India registered under Indian Companies Act, 2013. Registered as Private Limited Company with Registrar of Companies, Delhi.
The CIN of the company is U72900HR2015PTC056837

Company website: www.yugasa.com
Company Phone: +918800522257
Company Email: contact@yugasa.com
Contact Person: Dharmesh Jaggi
Person Authorized to sign Bid Documents: Dharmesh Jaggi

PAN of Yugasa: AAACY7582J

Certifications:
CMMI 3
ISO 27001:2022
ISO 9001:2015

Valid GST registration. GST Number of Yugasa: 06AAACY7582J1ZU

Yugasa is the official Meta Business Partner as ISV solution provider for WhatsApp.

Turnover of previous years:

2024-25: INR 3.52 Crores
2023-24: INR 3.29 Crores
2022-23: INR 3.19 Crores
2021-22: INR 3.35 Crores
2020-21: INR 2.18 Crores

Yugasa software Labs Pvt Ltd is not barred or blacklisted by any PSU, government department, or private sector entity. 

Yugasa software labs pvt ltd is an MSME and registered Startup

Manpower on Yugasaâ€™s payroll:
Currently Yugasa has 40 employees on its payroll.

Some previously done projects of Yugasa:

Project 1
Client Name: Narayana Hospitals
Project Title: Development and Implementation of WhatsApp Business API Solution and AI-enabled Chatbot for support automation
Project Scope:
- Integration of WhatsApp Business API with NH's existing systems
- Development of AI-enabled chatbot for patient interaction and support management
- Multi-language support including English and regional languages
- Real-time response and query resolution for patients
- Continuous support and maintenance of the chatbot system
Project Value: The total value of the project till date is INR 30,35,605, and the project is ongoing.
Project Duration: September 30th, 2022 to Present (Ongoing)

Project 2
Client Name: NSC Guwahati
NSC Guwahati is a Ministry of Culture organization 
Project Title: Cashless ticket booking chatbot on WhatsApp.
Project Value: INR 14 lakhs 

    )r1   r2   ÚCOMPANY_INFO_DOCr'   r'   r'   r(   Úget_company_infoÐ  s   	rh  c              
      sê  ‡ fdd„}‡ fdd„}|dƒ |   dd¡d }|   d	d¡d }|d
t|ƒ› dƒ |dt|ƒ› dƒ d|› d|› d|› d}z|dt› dƒ tdkratjjd|gtjddddd}|j}	n6tdkr{t	j
jtdddd|dœgd}|jd j}	ntdkr—tjjjtd ddœd|dœgd!}|jd jj}	|d"t|	ƒ› dƒ |d#|	› ƒ d$|	 ¡ v }
t d%|	tj¡}|r¿| d&¡ ¡ nd'}|d(|
rÇd)nd*› ƒ |
|	fW S  tyô } z|d+t|ƒ› ƒ d,d-t|ƒ› fW  Y d.}~S d.}~ww )/a%  
    Check if the company is eligible for the bid

    Args:
        extracted_info (dict): Extracted tender information
        company_info (str): Company information
        process_logger: Optional process logger for detailed logging

    Returns:
        tuple: (is_eligible, reason)
    c                    r®   r¯   r°   r±   r³   r'   r(   r´   1  rµ   z#check_eligibility.<locals>.log_infoc                    r®   r¯   r¶   r±   r³   r'   r(   r·   6  rµ   z$check_eligibility.<locals>.log_errorzStarting eligibility check...rº   r¾   r¿   r»   zEligibility criteria length: rÄ   zExemptions length: zÞ
    You need to determine if the company is eligible to apply for a tender based on the eligibility criteria, exemptions to some special type of companies (if any), and company information.

    Eligibility Criteria:
    z

    Exemptions:
    z

    Company Information:
    a3  

    Please analyze if the company meets all the eligibility criteria. Return your answer in the following format:

    Eligible: [Yes/No]
    Reason: [Detailed explanation of why the company is eligible or not]
    Missing Requirements: [List any requirements the company doesn't meet, if applicable]
    zCalling LLM (z) for eligibility assessment...rÆ   zgemini-2.5-flash-preview-04-17z^You are an expert in tender eligibility assessment. Be thorough and accurate in your analysis.iÐ  rÇ   rÈ   rÌ   r   r   rÐ   rÑ   rÓ   r×   rÕ   rØ   zEligibility response received: zEligibility response: zeligible: yesz!Reason:\s*(.*?)(?:\n\n|\n[A-Z]|$)r+   zNo detailed reason providedz%Eligibility check completed. Result: ÚEligiblezNot Eligiblez%Error checking eligibility with LLM: Fz Error during eligibility check: N)rg   rD   rè   ré   rê   rë   rì   rí   rÂ   rî   rÖ   rï   rð   r¿   rñ   rò   ró   r   rô   rõ   r/   rp   r4  r5  r  rä   rk   rN   )ra  Úcompany_infor„   r´   r·   Úeligibility_criteriaÚ
exemptionsr  rÛ   r  Úis_eligibleÚreason_matchÚreasonr{   r'   r³   r(   Úcheck_eligibility$  sr   üù
öýý	ÿûþþ
€þrp  r¯   )Nrš   ÚshutilÚtimerp   Úrequestsrc   Ú	anthropicr   r.  Úpathlibr   Úbid_prep_automationÚbpaÚbid_queriesÚbqÚfile_download_robustr€   ÚargparseÚsysr*  Útypingr   r   r   r   r   Úgoogle.generativeaiÚgenerativeaiÚgenaiÚgoogle.generativeai.typesr   rì   rñ   r	   Údocument_extractorr
   r   Údotenvr   Úgetenvr   r   r   r   r   rg  r   ÚbasicConfigÚINFOÚ	getLoggerr   r1   Ú	configureré   rè   Ú	Anthropicrî   rð   r   r~   r‡   r”   r¥   r­   r
  rå   ræ   r  rç   r3  rö   r7  r8  r÷   rø   rf  rh  rp  r'   r'   r'   r(   Ú<module>   s‚    



þ  
7
, #$5- 
Q"(6,$T