o
    !6i

                    @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZmZ d dl	m
Z
mZ d dlmZ d dlmZmZmZmZmZmZ d dlZd dlZd dlZd dlZd dlZd dlmZ e  e dZe dd	Ze d
Ze ddZe ddZd dl m!Z" d dl m#Z# d dl$m%Z% ej&ej'dd e(e)Z*dZ+dZ,eZ-e dZ.e ddZ/ej0e-dZ1e"j2ddZ3eZ4d	Z5e%eedZ$G dd dZ6G dd dZ7d d! Z8d"d# Z9d`d$ed%ee fd&d'Z:d(e;d%ee fd)d*Z<d+e;d%ee fd,d-Z=d%efd.d/Z>d0e;d%e;fd1d2Z?d3d4 Z@d`d5e;d6e;d7e;d8eee;ef  d%e;f
d9d:ZAd`d;ed<ed=e;d%e;fd>d?ZBd`d;ed<ed=e;d@e;d%e;f
dAdBZCd;edCee d%efdDdEZDd;edFee d@e;d%ee fdGdHZEd`d;ed$ed7e;d@e;d=e;d%e;fdIdJZFdKdL ZGdMe;dNe;d7e;d=e;d$ed%e;fdOdPZHdQe;d%ee;ef fdRdSZId`dTe;dUe;dVe;d7e;d=e;d%e;fdWdXZJd`dTe;dYe;d7e;d=e;dZe;d[e;d@e;d%e;fd\d]ZKd`d;ed$ed7e;d=e;d5e;d%e;fd^d_ZLdS )a    N)	PdfReader	PdfWriter)Path	PosixPath)datetime)ListDictTupleAnyOptionalUnion)load_dotenvANTHROPIC_API_KEYANTHROPIC_MODELzclaude-3-5-haiku-latestOPENAI_API_KEYOPENAI_MODELz Qwen/Qwen3-Next-80B-A3B-Instruct
OPENAI_URLz#https://api.deepinfra.com/v1/openai)types)OpenAIz4%(asctime)s - %(name)s - %(levelname)s - %(message)s)levelformatz
D:\Tendersz+C:\Users\Vivek\minaions\sample_company_docsGEMINI_API_KEYGENAI_ENGINEclaude)api_keygemini)
model_name)r   base_urlc                   @   s4   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdS )colorz[95mz[96mz[36mz[94mz[92mz[93mz[91mz[1mz[4mz[0mN)__name__
__module____qualname__PURPLECYANDARKCYANBLUEGREENYELLOWREDBOLD	UNDERLINEEND r,   r,   >/var/www/html/minaions-tender/ai-engine/bid_prep_automation.pyr   9   s    r   c                   @   s    e Zd ZdZdZdZdZdZdS )DocumentTypestandard
experiencecustomannexureregeneratedN)r   r    r!   STANDARD
EXPERIENCECUSTOMANNEXUREREGENERATEDr,   r,   r,   r-   r.   E   s    r.   c                 C   s  zt | dD}t|}t|j}d}t }|D ]+}d|  kr#|kr:n n|j|d  }	||	 d 7 }||	 qtd| d qW d   n1 sMw   Y  t |d}
||
 W d   n1 sgw   Y  |rq|W S d	}t |d}|	 }W d   n1 sw   Y  t
jjjd
ddddd|ddddt|dddgdgd}|jd j}|W S  ty   td|   Y dS  ty } ztd|  W Y d}~dS d}~ww )a#  
    Extracts text from specified pages of a PDF.

    Args:
        pdf_path (str): Path to the PDF file.
        page_numbers (list): List of page numbers to extract (1-based indexing).

    Returns:
        str: Concatenated text from the specified pages, or None if an error occurs.
    rb    
zPage number z is out of range. Skipping.Nwba  
            This message contains scanned pages from a PDF document which will contain a format or annexure.
            
            For EACH PAGE, extract ALL text content visible in it along with its formatting information like tables etc.

            For preserving the tables formatting, you may use underscores '_' and pipe '|' symbols.

            NOTE that the text in the scanned pages can be in any language.
            zclaude-haiku-4-5  皙?zYou are an expert at extracting annexure or table content from scanned documents. Extract ALL text visible on EACH page, preserving the original formatting as much as possible.usertexttyperA   documentbase64application/pdfutf-8rC   
media_typedatarC   sourcerolecontentmodel
max_tokenstemperaturesystemmessagesr   zError: PDF file not found at zAn error occurred: )openr   lenpagesr   extract_textadd_pageprintwritereadselfclaude_clientrU   createrE   	b64encodedecoderO   rA   FileNotFoundError	Exception)pdf_pathpage_numbersoutput_pathpdf_filereader	num_pagesextracted_textwriterpage_numpageout_filepromptpdf_contentresponseer,   r,   r-   get_annexure_contentM   sh   
	
rt   c                 C   s   t j|std| t|d}| }W d   n1 s!w   Y  d|  d}tjjt	dddd	d
|ddddt
|dddgdgd}|jd jS )am  
    Generates PDF-compatible HTML based on text content and formatting from a reference PDF.
    
    Args:
        doc_text (str): Raw text content to be formatted
        pdf_path (str): Path to reference PDF for formatting guidance
        
    Returns:
        str: Generated HTML code
    
    Raises:
        FileNotFoundError: If PDF file doesn't exist
    zReference PDF not found: r9   NzAnalyze the attached reference format and styling of a document in the PDF and create PDF-compatible HTML for this text content:
    
    <raw_content>
    a  
    </raw_content>

    Follow these exact requirements:
    
    1. Layout Analysis:
    - Study the reference PDF's structure (headers, margins, spacing)
    - Identify font styles (sizes, families, weights) through visual patterns
    - Note paragraph spacing and indentation rules
    
    2. Formatting Rules:
    - Preserve ALL original text styling (bold/italic/underline) using semantic HTML
    - Maintain EXACT table structures from reference (columns, borders, alignment)
    - Replicate list formatting (bullet styles, numbering, indentation)
    
    3. HTML Requirements:
    - Use A4 page size: <style>@page { size: A4 portrait; margin: 2cm }</style>
    - Tables: Use <table> with inline CSS for borders/padding
    - Placeholders: <span style="background-color: #ffff00; border: 1px dashed #000">fill_data_here</span>
    - Lists: Convert bullets to <ul> with 1em left margin
    - Headers: Match hierarchy (h1-h6) from reference
    
    4. Output Constraints:
    - No markdown, ONLY HTML/CSS
    - Include complete <html> document structure
    - Ensure print compatibility with PDF converters
    - Preserve white-space: pre-wrap for code blocks
    
    Return ONLY the HTML code with no additional commentary.r>   r?   zuYou are a professional document formatting expert specializing in PDF-to-HTML conversion with pixel-perfect accuracy.r@   rA   rB   rD   rE   rF   rG   rH   rK   rM   rP   r   )ospathexistsrc   rV   r]   r_   rU   r`   claude_model_mainrE   ra   rb   rO   rA   )doc_textre   rh   rq   rp   rr   r,   r,   r-   document_formatting   s8   
"rz   extracted_inforeturnc                 C   s(  |d u rt }	 | ddd }|r|dkrt d g S | ddd }| ddd }d|r5|d d	 nd
 d|r@|d d nd
 d| d}z)tdkrttjjtddddddd|dgd}|d|j	d j
  |j	d j}ntdkrtjjjtdddd|dgd}|jd jj	}td|tj}|r|d}	nAtd|tj}|r|d}	n1|d }
g }d!}|
D ]}| d"s|rd#}|| | d$r nq|rd |}	n|}	|	 }	|	d"sd"|	 }	|	d$s|	d$ }	zt|	}W n tjy   |d% t|}Y nw |d&t | d' t!|D ]M\}}|d(|d  d)|d*d+ d,|d-d+  |d-t"j#krn|d.rnd/}t$||d.tj%}|rc|d |d.< |d0|d.  q"|W S  t&y } zt 'd1t(|  t)|W  Y d }~S d }~ww )2N"Documents needed to submit the bidr:   rO   zNot found in any documentz7No document requirements found in extracted information"Scope of work of the whole project;Eligibility/Qualification Criteria or conditions for biddera+  
    You are an expert in government tender document preparation. I'll provide you with text 
    describing document requirements for a tender bid. Analyze this text and extract a structured list 
    of all required documents.
    
    For each document, identify:
    1. Document name (brief but descriptive)
    2. Document type (exactly one of: STANDARD, EXPERIENCE, CUSTOM)
       - STANDARD: Standard Company documents like company registrations/incorporation, certificates, accreditations, 
         PAN, GST, MSME certificate, employee details, Team CVs, Turn over & financial documents etc. that the company already has
       - EXPERIENCE: Past work experience documents like work orders, completion certificates, PO, contracts etc.
       - CUSTOM: Documents to be created specifically for this bid (declarations, proposals, cover letter etc.)
    3. Description of what the document should contain
    
    IMPORTANT EXCLUSION RULE:
    DO NOT include any document that is an Annexure, Format, Form, or Proforma specifically provided 
    in the RFP document with a proper name or number (e.g., "Annexure-I", "Format A", "Form 6", "Proforma-B").
    These annexure/format documents will be handled separately. Only extract STANDARD, EXPERIENCE, and 
    CUSTOM documents that the bidder needs to prepare or already possesses.
    
    ADDITIONAL NOTES:
    a) No two documents should be mentioned in one entry. Each document should be a separate entry in the list.
       DO NOT repeat any same document entry in the output.
    b) If a document name is 'Additional Document 1, 2, 3 or 4 (Requested in ATC)', drop it. These are 
       documents with generic description like 'Additional document format <n> as specified in ATC'. 
       These documents should not be included in the final list of documents.
    c) If a document requirement says "as per Annexure X" or "in Format Y" or "use Form Z", do NOT include 
       it in your output - these are annexure-based documents that will be handled separately.
    
    Tender context for reference:
    SCOPE OF WORK SUMMARY:   zNot providedz#
    ELIGIBILITY CRITERIA SUMMARY:   z*
    
    Document requirements text:
    a  
    
    Return your analysis as a JSON array of objects with these keys:
    - "name": String (Document name)
    - "type": String (One of: STANDARD, EXPERIENCE, CUSTOM)
    - "description": String (Detailed description of the document)
    
    Make sure to identify every required document (excluding annexures/formats/forms), even if it's 
    mentioned in passing or in a complex format.
    r     r;   enabled  rC   budget_tokenszrYou are an expert in analyzing tender documents. Extract the requested information accurately in JSON format only.r@   rM   rQ   rR   rS   thinkingrT   rU   K   
*************************************************
🧠 Minaions Thinking: r   open_llmrT   rQ   rU   z```json\s*(.*?)\s*```z\[\s*\{.*\}\s*\]r<   F[T]zLFailed to parse LLM response as JSON, trying to extract individual documentsu    👨‍💻 MinAIons identified z required documentsu   📄Document : nameUnknownz	 - Type: rC   source_documentz.[\w\-_.]+\.(?:pdf|doc|docx|txt|xls|xlsx|PDF)\bz  Found in: z/Error identifying required documents with LLM: )*loggergetwarning	llm_modelr_   rU   r`   rx   inforO   r   rA   openaichatcompletionsr   choicesmessageresearchDOTALLgroupsplitstrip
startswithappendendswithjoinjsonloadsJSONDecodeErrorextract_documents_from_textrW   	enumerater.   r7   findall
IGNORECASErd   errorstrsimple_document_extraction)r{   process_loggerdocument_infoscope_of_workeligibility_criteriarp   rr   response_text
json_matchjson_strlines
json_linesstartedlinerequired_docsidocpatternmatchesrs   r,   r,   r-   identify_required_documents   s   	
"-




	0r   rA   c                 C   s   g }|  d}i }|D ]E}| }td|}|r,|r$d|v r$|| d|di}qtd|}|r>|r>|d|d< qtd|}|rP|rP|d|d< qq|r\d|v r\|| |S )	z
    Helper function to extract document information from text when JSON parsing fails
    
    Args:
        text (str): The text response from Claude
        
    Returns:
        list: List of document dictionaries
    r<   z!(?:^|\s)(?:Document|Name):\s*(.+)r   r;   z,(?:^|\s)Type:\s*(STANDARD|EXPERIENCE|CUSTOM)rC   z(?:^|\s)Description:\s*(.+)description)r   r   r   r   r   r   )rA   docsr   current_docr   
name_match
type_match
desc_matchr,   r,   r-   r     s.   



r   r   c                    s   g }|  d}|D ]@}| }|sq	td|rItdd| tj}t fdddD r1tj}nt fdddD r?tj	}|
 | d	d
 q	|S )z
    Simple fallback method to extract document requirements when LLM fails
    
    Args:
        document_info (str): Document requirements text
        
    Returns:
        list: List of basic document dictionaries
    r<   u   ^(\d+\.|\-|\*|\•|\–) r:   c                 3       | ]	}|   v V  qd S Nlower.0termdoc_namer,   r-   	<genexpr>      z-simple_document_extraction.<locals>.<genexpr>)
certificateregistrationpangstmsmeisocmmizbalance sheetzincome tax returnzaudit reportc                 3   r   r   r   r   r   r,   r-   r     r   )z
work orderzcompletion certificater0   	portfolioN)r   rC   r   r   )r   r   r   matchsubr.   r6   anyr4   r5   r   )r   r   r   r   doc_typer,   r   r-   r     s*   

r   c              
   C   s  t jg t jg i}tj| d}tj|r:t|D ]}|dr9|t j 	|tj||t
tj||d qtj| d}tj|rlt|D ]}|drk|t j 	|tj||t
tj||d qLtdt|t j  dt|t j  d |S )z
    Get a list of available company documents in the repository
    
    Returns:
        dict: Dictionary mapping document types to lists of available documents
    Standard_Documents)z.pdfz.docxz.jpgz.pngz.xlsxz.csv)r   rv   r   Experience_DocumentsFound z standard documents and z experience documents)r.   r4   r5   ru   rv   r   rw   listdirr   r   get_document_descriptionr   r   rW   )company_docs_dircompany_docsstd_doc_pathfileexp_doc_pathr,   r,   r-   get_available_company_documents  s6   




r   	file_pathc                 C   s   | d }t j|r't|ddd}|  W  d   S 1 s"w   Y  t j| }t j|d }|ddd	d}|S )
z
    Get description for a document based on filename or content
    
    Args:
        file_path (str): Path to the document
        
    Returns:
        str: Document description
    z.descrrG   encodingNr   _ -)	ru   rv   rw   rV   r]   r   basenamesplitextreplace)r   	desc_pathffilenamer   r,   r,   r-   r     s   
 r   c                 C   s   | st d dS t dt|  d g d}t|ddd}tj||d	}|  ||  W d   dS 1 s9w   Y  dS )
zWrites a list of dictionaries to a CSV file.

    Args:
        data: A list of dictionaries.
        filename: The name of the CSV file to write to.
    u3   ❌ No required documents found in the RFP details.Nu   
📄 z3 documents identified to be submitted for this bid.)r   rC   r   r   rX   
isSelectedwr:   )newline)
fieldnames)r[   rW   rV   csv
DictWriterwriteheader	writerows)rJ   r   r   csvfilerl   r,   r,   r-   write_list_of_docs_to_csv*  s   "r   bid_dirr   company_infor   c                 C   s  |d u rt }	 tj| d}tj|d}tj|r8t|ddd}t|}W d    n1 s2w   Y  n
td td dS |	d	d
d }	tj| d}
tj
|
dd tjtjtjtjfD ]}tj
tj|
|dd qb|}t|tj|d t|}t d|  g g g d}g }|D ]}d }d }td| |d dkrt|||
|\}}n0|d dkrt|||
|	|\}}n|d dkrt||||	|
|}n|d dkrt||||
| |}|r|| |s|r|rtj||
nd
}|d |d |d |d |d |d |d |dd q|d |d  |d |d |d d
dd q|d  |d!t|d   |d"t|  |d#t|d   |d rh|d$d%|d   |
||fS )&Ntender_analysisztender_analysis.jsonr   rG   r   uA   ❌ Looks like this RFP has not been analysed by Minaions before.zNPlease check the RFP directory path or analyze the RFP through Minaions first.z/final/docs/directoryr~   r:   rO   
final_docsTexist_okzrequired_documents.csvz"Available company documents are: 
)preparedmissingindexzRequired Document:rC   r4   r5   r6   r7   r  r   )r   rC   rv   r  Prepared)r   rC   rv   statusr  Missingu(   📚 Bid documents preparation completedu   📃 Prepared documents: u#   📎 Referenced company documents: u   ⚠️ Missing documents: u   🔺 Missing documents: z, )r   ru   rv   r   rw   rV   r   loadr[   r   makedirsr.   r4   r5   r6   r7   r   r   r   process_standard_documentprocess_experience_documentgenerate_custom_documentprocess_annexure_formatextendrelpathr   rW   r   )r   r   r  r   r   extracted_info_pathextracted_info_filer   r{   r   final_docs_dirr   required_documentsavailable_documentsdocument_statusreferenced_company_docsreq_docdocument_pathreferenced_doc_inforel_pathr,   r,   r-   prepare_bid_documentsB  s   	







r  r  r  r  c                 C   s  |d u rt }	 |d| d   |dg }|s |d dS d| d  d| d  d	}t|d d
 D ]\}}|d|d  d|d  d|d  7 }q5|d7 }ztdkrntjjt	dddd|dgd}|j
d j }	ntdkrtjjjtdddd|dgd}|jd jj
 }	|d|	  |	 dkr|d| d   W dS td|	}
g }|
D ]4}z)t|d }d|  krt|k rn n|||  t d|| d   W q ty   Y qw |r.|dt| d | d   g }|D ],}|tj|d! |d! tjtj| d |dd"d# |d$tj|d!   qd |fW S |d%| d   W dS  tyX } z| d&t!|  W Y d }~dS d }~ww )'Nu&   🕵️ Processing standard document: r   r/   zNo standard documents availableNNz
    You are an expert in government tender bidding and document matching. I need to find the most relevant 
    company standard documents that match the following requirement:
    
    REQUIRED DOCUMENT:
    Name: 
    Description: r   ze
    
    Here are the available standard company documents (number, filename, and description):
    d   r<   r;   . r   a  
    
    Please identify ALL standard documents that are relevant to the required document. 
    Consider document types, certifications, registrations, and any keywords that indicate relevance.
    Return only the numbers of the relevant documents separated by commas, in order of relevance.
    If no documents are relevant, return "NONE".
    
    For example: "3,7,1" or "5" or "NONE"
    NOTE: PLEASE DO NOT RESPOND WITH ANY ADDITIONAL COMMENTARY OR INFORMATION OTHER THAN THE NUMBERS OF RELEVANT DOCUMENTS SEPARATED BY COMMAS.
    r     r   zhYou are an expert in analyzing tender documents and matching company standard documents to requirements.r@   rM   rP   r   rT   r   z2Minaions response for standard document matching: NONEz*No relevant standard documents found for: \d+zFound relevant document: r   z$ relevant standard document(s) for: rv   r:   r   
local_pathrC   categoryrequired_doc_namer   zReferencing standard document: z'No valid standard documents found for: z+Error finding relevant standard documents: )"r   r   r   r   r   r   r_   rU   r`   rx   rO   rA   r   r   r   r   r   r   r   upperr   r   intrW   r   
ValueErrorru   rv   r   r.   r4   rd   r   r   )r  r  r  r   standard_docsrp   r   r   rr   r   rankingsrelevant_docsrankr  referenced_docsrs   r,   r,   r-   r    s   
(


r  r   c           	      C   s*  |d u rt }	 |d| d   |rSt| |tj |}|rSg }|D ],}|tj|d |d tjtj| d |	ddd |dtj|d   q"d |fS t
| |tj }|rtj|d |d tjtj| d |	dddg}|dtj|d   d |fS |d| d   d	S )
Nu(   🕵️ Processing experience document: r   rv   r   r:   r'  z!Referencing experience document: z+No matching experience document found for: r   )r   r   $select_relevant_experience_documentsr.   r5   r   ru   rv   r   r   find_best_document_matchr   )	r  r  r  r   r   r0  r2  r   
best_matchr,   r,   r-   r  *  sH   

	r  available_docsc           	      C   s   |sdS d}d}t | d   }|D ],}t |d   |d    }||}t|tt|d }||kr@|}|}q|dkrG|S dS )a  
    Find the best matching document from available documents
    
    Args:
        req_doc (dict): Required document information
        available_docs (list): List of available documents
        
    Returns:
        dict: Best matching document, or None if no good match
    Nr   r   r   r;   g333333?)setr   r   intersectionrW   max)	r  r6  r5  
best_score	req_termsr   	doc_termscommon_termsscorer,   r,   r-   r4  g  s    $
r4  experience_docsc              
   C   s  |r|sg S d|dd  d| d  d| d  d}t |dd	 D ]\}}|d
|d  d|d  d|d  7 }q"|d7 }zptdkr[tjjtdddd|dgd}|jd j }ntdkryt	j
jjtdddd|dgd}|jd jj }td|}g }	|D ](}
zt|
d }d|  krt|k rn n|	||  W q ty   Y qw |	W S  ty } ztdt|  g W  Y d}~S d}~ww )aY  
    Select relevant experience documents based on scope of work
    
    Args:
        req_doc (dict): Required document information
        experience_docs (list): List of available experience documents
        scope_of_work (str): Scope of work for context
        
    Returns:
        list: Ranked list of relevant experience documents
    z
    You are an expert in government tender bidding. I need to select the most relevant past experience 
    documents for a tender with the following scope of work:
    
    SCOPE OF WORK:
    Nr   zJ  # Limit scope to avoid token limit issues
    
    The tender requires: r   z - r   zV
    
    Here are the available experience documents (filename and description):
       r<   r;   r#  r   a  
    
    Please rank the top 3 most relevant experience documents for this tender requirement, 
    considering relevance to the scope of work. Return only the numbers of the documents in order 
    of relevance, separated by commas. For example: "5,12,3"
    r   r$  r   z\You are an expert in analyzing tender documents and selecting relevant experience documents.r@   rM   rP   r   rT   r   r&  z/Error selecting relevant experience documents: )r   r   r_   rU   r`   rx   rO   rA   r   r   r   r   r   r   r   r   r   r,  rW   r   r-  rd   r   r   r   )r  r?  r   rp   r   r   rr   r   r/  ranked_docsr1  r  rs   r,   r,   r-   r3    s`   
(
r3  c                 C   s  |d u rt }	 |d| d   |ddd }|ddd }d| d  d| d  d	| d
  d|d d  d|d d  d|d d  d| d}ztdkrvtjjtddddddd|dgd}	|d|	jd j	  |	jd j
}
ntdkrtjjjtdddd|dgd }	|	jd jj}
tj|tj}td!d| d  d"d#}tj|| d$}tjd%d|
tjd&}
td'd|
}
t|d(d)d*}||
 W d    n1 sw   Y  |d+tj|  |W S  ty } z|d,t |  W Y d }~d S d }~ww )-Nu!   🤖 Generating custom document: r   r   r:   rO   zPayment termszY
    You are an expert in government tender document preparation. I need you to create a z 
    for a tender bid. The document should be professional, complete, and follow standard formats.
    
    Document Required: r!  r   zC
    
    Relevant Tender Information:
    
    SCOPE OF WORK:
    i  z$
    
    ELIGIBILITY CRITERIA:
    z
    
    PAYMENT TERMS:
    r   z&
    
    Bidder Company Details:
    a  
    
    Please generate the complete document text in a professional format. Include:
    - Appropriate header with company letterhead elements
    - Date and reference number
    - Professional salutation and closure
    - All necessary declarations or statements
    - Any legal language typically required for such a document
    - Proper formatting with bold, line breaks, paragraphs and tabs etc using HTML tags.
    
    
    The document should be ready to print and sign without further modifications. 

    PLEASE NOTE that:
    1. The output should be in portrait A4 size PDF compatible HTML ONLY.
    2. For tables in the output response, please use HTML table tags.
    3. For any bullet points or serial points, please put them in new lines instead of putting them in one blob of text.
    4. DO NOT make up or create imaginary details for Company's critical information like employee details, designations, previous projects etc, unless provided explicitly in 'Bidder Company Details'. 
    r   r   r;   r   r   r   =You are an expert in preparing professional tender documents.r@   rM   r   r   r   r   rT   r   [^\w\s-]r   r   .html```htmlflags```r   rG   r   zGenerated custom document: z"Error generating custom document: )!r   r   r   r   r_   rU   r`   rx   rO   r   rA   r   r   r   r   r   r   ru   rv   r   r.   r6   r   r   r   r   r   rV   r\   r   rd   r   r   )r  r{   r  r   r  r   eligibilitypayment_termsrp   rr   document_textdest_dir	safe_name	dest_pathr   rs   r,   r,   r-   r    sv   



(r  c                 C   s@   | r|   dkr
dS tdd|   }dd | D }t|S )zi
    Count words in a given text string.
    Removes extra whitespace and handles basic punctuation.
    r:   r   z\s+r   c                 S   s   g | ]}|r|qS r,   r,   )r   wordr,   r,   r-   
<listcomp>V  s    zcount_words.<locals>.<listcomp>)r   r   r   r   rW   )rA   cleaned_textwordsr,   r,   r-   count_wordsJ  s
   rS  org_doc_file_pathuser_promptc                 C   sB  t d|   zt| ddd}| }W d   n1 sw   Y  W n& ty2   d|   Y S  tyJ } zdt| W  Y d}~S d}~ww |dd	d
 }|dd	d
 }	|dd	d
 }
d}d}d}|t|7 }|t|7 }|t|7 }|t|	7 }|t|
7 }td| | d}d}d| d| d| d|	 d|
 d}zNt	dkrt
jd|gtjdddd}|j}n6t	dkrtjjtdd|d|d gd!}|jd j}nt	d"krtjjjtd#|d d|d gd$}|jd jj}W n ty } ztd%t|  W Y d}~d&S d}~ww t|jd'd'd( t  d)}t| j!}d*| d+| d,}t"j#$|t%j&}t"j#$||}|' (d-sNd.| d/}|}t)j*d0d	|t)j+d1}t)*d2d	|}z't|d3dd}|,| W d   n1 sxw   Y  W ||fS W ||fS  ty } zd4t| W  Y d}~S d}~ww )5aY  
    Regenerate a document using Claude Sonnet for bid submission purposes.
    
    Args:
        org_doc_file_path (str): Path to the original document file
        user_prompt (str): User's instructions on what needs to be changed
        company_info (str): Bidder company's information and details
        final_docs_dir (str): Directory to save the final document
        scope_of_work (str): Scope of work from the RFP documents
        eligibility_criteria (str): List of eligibility criteria for bidders
    
    Returns:
        str: Path to the generated document file or error message
    z%Regenerating the requested document: r   rG   r   Nz+Error: Original document file not found at z!Error reading original document: r}   r:   rO   r~   r   g        g{Gzt?r      zYou are an expert document writer specializing in creating professional bid submission documents for tenders and RFPs. Your task is to regenerate, recreate, or rephrase documents based on user requirements while maintaining professionalism and accuracy.z}
Please regenerate the following document based on the user's requirements and provided information:

**ORIGINAL DOCUMENT:**
z'

**USER'S REQUIREMENTS FOR CHANGES:**
z"

**BIDDER COMPANY INFORMATION:**
z

**SCOPE OF WORK FROM RFP:**
z

**ELIGIBILITY CRITERIA:**
a  

**IMPORTANT FORMATTING AND CONTENT REQUIREMENTS:**
1. The output should be in portrait A4 size PDF compatible HTML ONLY.
2. For tables in the output response, please use HTML table tags with proper styling.
3. For any bullet points or serial points, please put them in new lines instead of putting them in one blob of text.
4. DO NOT make up or create imaginary details for Company's critical information like employee details, designations, previous projects etc, unless provided explicitly in 'Bidder Company Details'.
5. Maintain professional tone and format suitable for bid submission, ensuring that the user's requirements for changes are properly addressed.
6. Ensure the document addresses relevant aspects from the scope of work and eligibility criteria where applicable.
7. Use proper HTML structure with appropriate CSS styling for A4 portrait format.

Please provide the complete regenerated document in HTML format that can be converted to PDF while maintaining A4 portrait layout.
r   gemini-2.5-flash-preview-04-17r>   皙?max_output_tokensrS   rQ   contentsconfigr   r@   rM   rP   r   rT   r   zError calling Claude API: )Nr   T)parentsr  z%Y%m%d_%H%M%Sregenerated_r   rD  z<!DOCTYPE html>a  <!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Regenerated Bid Document</title>
    <style>
        @media print {
            @page {
                size: A4 portrait;
                margin: 1in;
            }
        }
        body {
            font-family: Arial, sans-serif;
            line-height: 1.6;
            color: #333;
            max-width: 8.27in;
            margin: 0 auto;
            padding: 20px;
        }
        table {
            width: 100%;
            border-collapse: collapse;
            margin: 20px 0;
        }
        table, th, td {
            border: 1px solid #ddd;
        }
        th, td {
            padding: 12px;
            text-align: left;
        }
        th {
            background-color: #f2f2f2;
            font-weight: bold;
        }
        ul, ol {
            margin: 10px 0;
            padding-left: 30px;
        }
        li {
            margin: 5px 0;
        }
        h1, h2, h3, h4, h5, h6 {
            color: #2c3e50;
            margin-top: 30px;
            margin-bottom: 15px;
        }
        .header {
            text-align: center;
            margin-bottom: 30px;
        }
    </style>
</head>
<body>
z
</body>
</html>rE  rF  rH  r   z$Error saving re-generated document: )-r[   rV   r]   rc   rd   r   r   rS  roundr   gemini_modelgenerate_contentr   GenerateContentConfigrA   r_   rU   r`   rx   rO   r   r   r   r   r   r   r   r   r   mkdirr   nowstrftimestemru   rv   r   r.   r8   r   r   r   r   r   r\   )rT  rU  r  r  r{   r   original_documentrs   r   r   r   
regen_costbase_cost_per_wordtotal_wordssystem_promptrp   rr   rK  generated_content	timestamporiginal_filenameoutput_filenamerL  rg   html_templater,   r,   r-   regenerate_documentZ  s   



8; rr  log_textc           
      C   s~   i }d}t || t j}|D ].}|d   }t|d }t|d }t|d }tt||d }	||||	d||< q|S )aq  
    Parse the annexure scan log and extract all annexure information.
    
    Args:
        log_text: The raw log output from the annexure scanning script
        
    Returns:
        Dictionary mapping annexure names to their details:
        {
            "ANNEXURE XIV": {"start": 74, "end": 75, "page_count": 2, "pages": [74, 75]},
            ...
        }
    uF   ✅\s*(ANNEXURE\s*[\w\-]+):\s*pages\s*(\d+)-(\d+)\s*\((\d+)\s*pages?\)r   r;   rV     )startend
page_countrX   )r   r   r   r   r+  r,  listrange)
rs  	annexuresr   r   r   annexure_name
start_pageend_pagerw  rX   r,   r,   r-   parse_annexure_log#  s   r~  r{  annexure_numberannexure_formatc              
   C   s  |d u rt }	 |rd| n| }d| d| d| d}ztdkr3tjd|gtjdd	d
d}	|	j}
nFtdkr]tjj	t
ddddddd|dgd}	|d|	jd j  |	jd j}
ntdkrytjjj	tdddd|dgd}	|	jd jj}
t|
|}tjdd|tjd}tdd|}tj|tj}tdd|  d d!}tj|| d"}tj|d#d$ t |d%d&d'}|!| W d    n1 sw   Y  |d(tj"|  |W S  t#y } z|$d)t%|  W Y d }~d S d }~ww )*N	Annexure z
    You are an expert in government tender document preparation. I need to fill out an annexure format
    for a bid submission. I've extracted the exact format from the tender document.

    Required Document: z2

    Here is the exact format from the RFP:

    zc

    Please fill this format with appropriate information from this Company Information:
    
    ae  
    
    Follow EXACTLY the original layout, tables, and structure, but fill in all blank fields.
    If there are tables in the format, then please use keys like | or _ or - or tabs and spaces to represent the exact format of the table in the output.

    Again please fill in ALL fields with appropriate information for this type of tender from the company information provided above.
    Completely replace the place holder text in the format with the actual information, wherever found.
    If a field needs specific technical information not provided here, use the string '<fill_data_here>'. DO NOT make up Company's critical information like employee details, designations, previous projects etc, unless provided explicitly in Bidder Company Details.

    IMPORTANT: Maintain the EXACT formatting and layout of the original. Return ONLY the filled document.
    r   rW  r>   rX  rY  r[  r   r   r;   r   r   r   zzYou are an expert in preparing professional tender annexure documents. Fill the exact format with appropriate information.r@   rM   r   r   r   r   rT   r   rE  r:   rF  rH  rC  r   r   rD  Tr  r   rG   r   u(   ✅ Generated filled annexure document: u0   🛑 Error generating filled annexure document: )&r   r   ra  rb  r   rc  rA   r_   rU   r`   rx   r   rO   r   r   r   r   r   r   r   rz   r   r   r   ru   rv   r   r.   r7   r   r   r  rV   r\   r   rd   r   r   )r{  r  r  r  r  re   r   annexure_descrp   rr   filled_documentformatted_documentrL  rM  rN  r   rs   r,   r,   r-   generate_filled_annexureK  sx   
"r  	file_descrI  req_docsc                 C   s   | d|   d|  d | }d| d| d| d| d| d	}	ztd
kr;tjjd|	gtjdddd}
|
j}nFtdkretj	j
tddddddd|	dgd}
| d|
jd j  |
jd j}ntdkrtjjj
tdddd|	dgd}
|
jd jj}tj|tj}tdd |  d!d"}tj|| d#}tj|d$d% tjd&d |tjd'}td(d |}t|d)d*d+}| | W d,   n1 sw   Y  | d-tj!|  |W S  t"y } z|#d.t$|  W Y d,}~d,S d,}~ww )/a  
    Fallback function to generate an annexure document when the exact format can't be found

    Args:
        req_doc (dict): Required document information
        final_docs_dir (str): Directory for final documents

    Returns:
        str: Path to the generated document
    z,Using fallback method to generate annexure: r  r   z
    You are an expert in government tender document preparation. I need to create an annexure document
    for a bid submission, but I dont have the exact format for this document.

    Required Document:

    a4  

    Please create a standard professional format typically used for this type of annexure in government tenders.
    This should look like a authentic, professional government tender annexure.

    To create this document, you may please use the appropriate information from this Company Information:

    zf

    Other related information about this tender/RFP is as following:

    Eligibility Criteria:
    z

    Scope of Work:
    z8

    Documents required to be sumitted in the bid:
    a  

    The format should include:
    - Professional header with annexure title
    - All standard fields typically found in this type of annexure
    - Appropriate spaces for signatures, dates, and stamps
    - Any declarations or statements typically required
    - Proper formatting with bold, line breaks, paragraphs and tabs etc using HTML tags.

    Please create a complete and professional document ready for submission without further modifications.

    PLEASE NOTE that:
    1. The output should be in portrait A4 size PDF compatible HTML ONLY.
    2. For tables in the output response, please use HTML table tags.
    3. For any bullet points or serial points, please put them in new lines instead of putting them in one blob of text.
    4. DO NOT make up or create imaginary details for Company's critical information like employee details, designations, previous projects etc, unless provided explicitly in 'Bidder Company Information'. 
    
    r   zgemini-2.5-flashr>   rX  rY  r[  r   r   r;   r   r   r   rB  r@   rM   r   r   r   r   rT   r   rC  r:   r   r   rD  Tr  rE  rF  rH  r   rG   r   Nu(   ✅ Generated Annexure Custom document: z-Error generating fallback annexure document: )%r   r   gemini_clientmodelsrb  r   rc  rA   r_   rU   r`   rx   rO   r   r   r   r   r   r   r   ru   rv   r   r.   r7   r   r   r   r   r  r   rV   r\   r   rd   r   r   )r{  r  r  r  rI  r  r   r   r  rp   rr   rK  rL  rM  rN  r   rs   r,   r,   r-   generate_annexure_fallback  s|   +r  c              	   C   s  |d u rt }	 |d| d   | d }| dd}| dd}| dg }	|d| d	| d
|	  |ddd }
|ddd }|ddd }td| tj}|r^|dnd }|d| d|  |d|  d }d }tt	|
d}|D ]F}|d| d| d|j  |dkr||jv rtj|d}tj|d}t||	|}|r|}|d|j   n
|d|j  q|rt|||||||S |d t|||||
|||S )Nu!   🤖 Processing annexure format: r   r   r   r   zNo Description givenrX   z
Trying to find annexure z	 in file z on pages: r   r:   rO   r}   r~   zP(?:annexure|format|form|proforma|checklist|annex)\s*[-\s]*([\dIVXivx]+|[a-zA-Z])r;   z(
annexure_name and annexure_number are: z and u   
🔍 Looking for annexure z**/*.pdfz
Processing Files: z with r  ztemp_annexure.pdfu   🎉 Found annexure content in zAnnexure not found in document zfCould not find annexure format in any tender document. Generating the document as custom document now.)r   r   r   r   r   r   r   r   rx  r   globr   ru   rv   r   rt   r   r  r  )r  r{   r  r  r   r   r{  source_hintr  rX   rI  r  r   annexure_matchr  annexure_contentsource_fileall_pdf_filesrh   out_dir
annex_pathr,   r,   r-   r  *  sJ   

r  r   )Mru   shutilr   r   	anthropicloggingPyPDF2r   r   pathlibr   r   r   typingr   r   r	   r
   r   r   argparsesysr   rE   dotenvr   getenvr   r   r   r   r   google.generativeaigenerativeaigenair   r   r   basicConfigINFO	getLoggerr   r   BASE_DIRCOMPANY_REPOSITORYCLAUDE_API_KEYr   r   	Anthropicr_   GenerativeModelra  rx   claude_model_liter   r.   rt   rz   r   r   r   r   r   r   r   r  r  r  r4  r3  r  rS  rr  r~  r  r  r  r,   r,   r,   r-   <module>   s     



NT $-,',o y="$$Rm" J$(,i(v