#!/usr/bin/env python3
"""
Bid Processing Script - Extract and process government tender data
Reads PDF work orders and JSON files, uses Claude LLM for PDF extraction
"""

import os
import json
import logging
import time
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Any
import base64
import PyPDF2
import pdfplumber
import anthropic
from openai import OpenAI
import argparse
import os
import requests

from dotenv import load_dotenv
load_dotenv()

# CLAUDE_MODEL = "claude-sonnet-4-20250514"
CLAUDE_MODEL = "claude-haiku-4-5"

# Create an OpenAI client with your deepinfra token and endpoint
openai = OpenAI(
    api_key="gpZ6FfGe2DKNfIIM2hzqIctuVnntHgvS",
    base_url="https://api.deepinfra.com/v1/openai",
)

llm_model = "open_llm" #"claude" #"gemini" #

open_llm_model = "Qwen/Qwen3-Next-80B-A3B-Instruct"

# Backend API configuration
BACKEND_API_URL = os.getenv("API_URL", "http://localhost:5000")
INTERNAL_API_KEY = os.getenv("INTERNAL_API_KEY")
MASTER_TENANT_ID = os.getenv("MASTER_TENANT_ID")


# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('bid_processing.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

class BidProcessor:
    def __init__(self, root_directory: str, output_file: str = "processed_bids_data.json",
                 anthropic_api_key: str = None):
        """
        Initialize the bid processor

        Args:
            root_directory: Root directory containing bid folders
            output_file: Output JSON file path
            anthropic_api_key: Anthropic API key for Claude
        """
        self.root_directory = Path(root_directory)
        self.output_file = Path(output_file)
        self.anthropic_api_key = anthropic_api_key or os.getenv("ANTHROPIC_API_KEY")

        if not self.anthropic_api_key:
            raise ValueError("Anthropic API key required. Set ANTHROPIC_API_KEY environment variable or pass it directly.")

        self.client = anthropic.Anthropic(api_key=self.anthropic_api_key)
        self.processed_data = self._load_existing_data()

    def _load_existing_data(self) -> Dict:
        """Load existing processed data if available"""
        if self.output_file.exists():
            try:
                with open(self.output_file, 'r', encoding='utf-8') as f:
                    data = json.load(f)
                logger.info(f"Loaded existing data with {len(data.get('bids', []))} bids")
                return data
            except Exception as e:
                logger.error(f"Error loading existing data: {e}")

        # Initialize empty structure
        return {
            "metadata": {
                "scrapedAt": datetime.now().isoformat(),
                "totalBids": 0,
                "totalFound": 0,
                "source": "Government Tender Processing",
                "withDetailedInfo": 0,
                "detailFetchSuccess": "0.0%"
            },
            "bids": []
        }

    def _upload_file_to_backend(self, file_path: Path, bid_number: str) -> Optional[Dict]:
        """
        Upload file to backend via API (stores in S3)

        Args:
            file_path: Path to file
            bid_number: Bid number for organizing files

        Returns:
            Dict with storage details or None if failed
        """
        try:
            if not file_path.exists():
                logger.warning(f"File not found: {file_path}")
                return None

            if not INTERNAL_API_KEY or not MASTER_TENANT_ID:
                logger.error("=" * 80)
                logger.error("CONFIGURATION ERROR: INTERNAL_API_KEY or MASTER_TENANT_ID not configured")
                logger.error("=" * 80)
                if not INTERNAL_API_KEY:
                    logger.error("❌ INTERNAL_API_KEY is missing from .env file")
                if not MASTER_TENANT_ID:
                    logger.error("❌ MASTER_TENANT_ID is missing from .env file")
                    logger.error("")
                    logger.error("To fix this:")
                    logger.error("1. Connect to MongoDB and find your tenant ID:")
                    logger.error("   mongo> use tenderdb")
                    logger.error("   mongo> db.tenants.findOne({}, {_id: 1, companyName: 1})")
                    logger.error("")
                    logger.error("2. Add the tenant _id to ai-engine/.env:")
                    logger.error("   MASTER_TENANT_ID=<your_tenant_id_here>")
                    logger.error("")
                logger.error("=" * 80)
                return None

            # Prepare file for upload
            safe_bid_number = bid_number.replace('/', '_').replace('\\', '_')
            file_name = file_path.name
            file_key = f"nexray_training/{MASTER_TENANT_ID}/{safe_bid_number}/{file_name}"

            # Determine content type based on file extension
            content_type_map = {
                '.pdf': 'application/pdf',
                '.doc': 'application/msword',
                '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
                '.xls': 'application/vnd.ms-excel',
                '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
                '.txt': 'text/plain',
                '.json': 'application/json',
                '.zip': 'application/zip',
                '.rar': 'application/x-rar-compressed',
                '.png': 'image/png',
                '.jpg': 'image/jpeg',
                '.jpeg': 'image/jpeg'
            }
            file_ext = file_path.suffix.lower()
            content_type = content_type_map.get(file_ext, 'application/octet-stream')

            with open(file_path, 'rb') as f:
                files = {'file': (file_name, f, content_type)}
                data = {
                    'tenant_id': MASTER_TENANT_ID,
                    'key': file_key,
                    'content_type': content_type,
                    'encrypt': 'false'
                }
                headers = {
                    'x-internal-api-key': INTERNAL_API_KEY
                }

                response = requests.post(
                    f"{BACKEND_API_URL}/internal-api/storage/upload",
                    files=files,
                    data=data,
                    headers=headers,
                    timeout=60
                )

                if response.status_code == 200:
                    result = response.json()
                    if result.get('success'):
                        logger.info(f"✅ Uploaded file to S3: {file_name}")
                        return result.get('data')
                    else:
                        logger.error(f"Backend upload failed: {result.get('message')}")
                        return None
                else:
                    logger.error(f"Upload API error: {response.status_code} - {response.text}")
                    return None

        except Exception as e:
            logger.error(f"Error uploading file {file_path.name}: {e}")
            return None

    def _create_document_record(self, storage_details: Dict, file_path: Path,
                                bid_number: str, metadata: Dict = None) -> Optional[str]:
        """
        Create Document record in database via API

        Args:
            storage_details: Storage details from S3 upload
            file_path: Original file path
            bid_number: Bid number
            metadata: Additional metadata

        Returns:
            Document ID or None if failed
        """
        try:
            if not storage_details:
                return None

            file_name = file_path.name

            # Determine document type based on file name
            file_lower = file_name.lower()
            doc_type = "nexray_document"

            document_data = {
                'tenant': MASTER_TENANT_ID,
                'name': file_name,
                'type': doc_type,
                'category': 'nexray_training',  # Special category for training data
                'storageType': 's3',
                'storageDetails': storage_details,
                'metadata': {
                    'bidNumber': bid_number,
                    'source': 'eproc_historical',
                    'originalFileName': file_name,
                    'uploadedAt': datetime.now().isoformat(),
                    'isNexRayTraining': True,
                    **(metadata or {})
                }
            }

            headers = {
                'x-internal-api-key': INTERNAL_API_KEY,
                'Content-Type': 'application/json'
            }

            response = requests.post(
                f"{BACKEND_API_URL}/internal-api/documents",
                json=document_data,
                headers=headers,
                timeout=30
            )

            if response.status_code in [200, 201]:
                result = response.json()
                if result.get('success'):
                    document_id = result.get('data', {}).get('_id')
                    logger.info(f"✅ Created document record: {file_name} (ID: {document_id})")
                    return document_id
                else:
                    logger.error(f"Document creation failed: {result.get('message')}")
                    return None
            else:
                logger.error(f"Document API error: {response.status_code} - {response.text}")
                return None

        except Exception as e:
            logger.error(f"Error creating document record: {e}")
            return None

    def _process_and_upload_files(self, folder_path: Path, bid_number: str, existing_documents: List[Dict] = None) -> List[Dict]:
        """
        Find all files in folder, upload to S3, and create document records

        Args:
            folder_path: Path to folder containing files
            bid_number: Bid number for organization
            existing_documents: List of already uploaded documents to avoid duplicates

        Returns:
            List of document information dicts
        """
        documents = []

        # If documents already exist for this bid, return them without re-uploading
        if existing_documents and len(existing_documents) > 0:
            logger.info(f"Documents already exist for bid {bid_number}, skipping upload")
            return existing_documents

        try:
            # Define allowed file extensions
            allowed_extensions = {'.pdf', '.doc', '.docx', '.xls', '.xlsx'}

            # Find all files in the folder with allowed extensions
            all_files = []
            for file in folder_path.iterdir():
                if file.is_file() and file.suffix.lower() in allowed_extensions:
                    all_files.append(file)

            if not all_files:
                logger.warning(f"No uploadable files (pdf, doc, docx, xls, xlsx) found in {folder_path}")
                return documents

            logger.info(f"Found {len(all_files)} uploadable file(s) in {folder_path.name}")

            for file in all_files:
                try:
                    # Upload file to S3
                    storage_details = self._upload_file_to_backend(file, bid_number)

                    if storage_details:
                        # Create document record
                        document_id = self._create_document_record(
                            storage_details,
                            file,
                            bid_number,
                            metadata={'folderName': folder_path.name}
                        )

                        if document_id:
                            documents.append({
                                'documentId': document_id,
                                'fileName': file.name,
                                'storageUrl': storage_details.get('url'),
                                'storageKey': storage_details.get('key'),
                                'fileSize': storage_details.get('size'),
                                'uploadedAt': datetime.now().isoformat()
                            })
                        else:
                            logger.warning(f"Failed to create document record for {file.name}")
                    else:
                        logger.warning(f"Failed to upload {file.name}")

                except Exception as e:
                    logger.error(f"Error processing file {file.name}: {e}")
                    continue

        except Exception as e:
            logger.error(f"Error processing files in folder: {e}")

        return documents

    def _save_data(self):
        """Save processed data to disk"""
        try:
            # Update metadata
            self.processed_data["metadata"]["totalBids"] = len(self.processed_data["bids"])
            self.processed_data["metadata"]["withDetailedInfo"] = sum(
                1 for bid in self.processed_data["bids"] if "detailedInfo" in bid
            )
            self.processed_data["metadata"]["detailFetchSuccess"] = (
                f"{self.processed_data['metadata']['withDetailedInfo'] / max(1, self.processed_data['metadata']['totalBids']) * 100:.1f}%"
            )

            # Save to file
            with open(self.output_file, 'w', encoding='utf-8') as f:
                json.dump(self.processed_data, f, indent=2, ensure_ascii=False)

            logger.info(f"Data saved to {self.output_file}")

        except Exception as e:
            logger.error(f"Error saving data: {e}")

    def _extract_text_from_pdf(self, pdf_path: Path) -> str:
        """Extract text from PDF file"""
        try:
            text = ""

            # Try pdfplumber first (better for complex layouts)
            try:
                with pdfplumber.open(pdf_path) as pdf:
                    for page in pdf.pages:
                        page_text = page.extract_text()
                        if page_text:
                            text += page_text + "\n"

                if text.strip():
                    logger.info(f"Successfully extracted text using pdfplumber from {pdf_path.name}")
                    return text

            except Exception as e:
                logger.warning(f"pdfplumber failed for {pdf_path.name}: {e}")

            # Fallback to PyPDF2
            try:
                with open(pdf_path, 'rb') as file:
                    pdf_reader = PyPDF2.PdfReader(file)
                    for page in pdf_reader.pages:
                        text += page.extract_text() + "\n"

                if text.strip():
                    logger.info(f"Successfully extracted text using PyPDF2 from {pdf_path.name}")
                    return text

            except Exception as e:
                logger.warning(f"PyPDF2 failed for {pdf_path.name}: {e}")

            # If both fail, return empty string (might be scanned PDF)
            logger.warning(f"Could not extract text from {pdf_path.name} - might be scanned")
            return ""

        except Exception as e:
            logger.error(f"Error processing PDF {pdf_path.name}: {e}")
            return ""

    def _analyze_pdf_with_llm(self, pdf_path: Path, pdf_text: str) -> Dict:
        """Use Claude to analyze PDF content and extract structured information"""
        try:
            prompt = f"""
            Analyze this government work order/purchase order document and extract the following information in JSON format:

            {pdf_text}

            Please extract and return ONLY a JSON object with this structure:
            {{
                "seller_details": {{
                    "name": "Company name",
                    "address": "Full address",
                    "contact": "Phone/email if available"
                }},
                "work_order_details": {{
                    "order_number": "Work order number",
                    "date": "Order date",
                    "total_amount": "Total contract value",
                    "completion_period": "Work completion period"
                }},
                "scope_of_work": "Complete description of work/items to be supplied",
                "items": [
                    {{
                        "description": "Item description",
                        "quantity": "Quantity",
                        "unit_rate": "Rate per unit",
                        "total_amount": "Total for this item"
                    }}
                ],
                "terms_and_conditions": "Key terms and conditions if any"
            }}

            Extract only the information that is clearly present in the document. Use null for missing information.
            """
            # If we have text, use it; otherwise, we'll need to convert PDF to base64 for Claude
            if pdf_text.strip():
                if llm_model == 'claude':
                    message = self.client.messages.create(
                        model=CLAUDE_MODEL,
                        max_tokens=4000,
                        messages=[{"role": "user", "content": prompt}]
                    )
                    response_text = message.content[0].text.strip()
                elif llm_model == 'open_llm':

                    response = openai.chat.completions.create(
                        model=open_llm_model,
                        messages=[
                            {"role": "user", "content": prompt},
                        ],
                    )
                    response_text = response.choices[0].message.content.strip()

            else:
                # For scanned PDFs, convert to base64 and send as document
                logger.info(f"PDF {pdf_path.name} appears to be scanned - using document analysis")

                # Read PDF file as binary and convert to base64
                with open(pdf_path, 'rb') as pdf_file:
                    pdf_data = pdf_file.read()
                    pdf_base64 = base64.b64encode(pdf_data).decode('utf-8')

                message = self.client.messages.create(
                    model=CLAUDE_MODEL,
                    max_tokens=4000,
                    messages=[
                        {
                            "role": "user",
                            "content": [
                                {
                                    "type": "document",
                                    "source": {
                                        "type": "base64",
                                        "media_type": "application/pdf",
                                        "data": pdf_base64
                                    }
                                },
                                {
                                    "type": "text",
                                    "text": prompt
                                }
                            ]
                        }
                    ]
                )

                # Parse Claude's response (same for both text and document cases)
                response_text = message.content[0].text.strip()

            # Extract JSON from response (handle potential markdown formatting)
            if "```json" in response_text:
                json_start = response_text.find("```json") + 7
                json_end = response_text.find("```", json_start)
                response_text = response_text[json_start:json_end].strip()
            elif "```" in response_text:
                json_start = response_text.find("```") + 3
                json_end = response_text.rfind("```")
                response_text = response_text[json_start:json_end].strip()

            try:
                extracted_data = json.loads(response_text)
                logger.info(f"Successfully analyzed PDF {pdf_path.name} with Claude")
                return extracted_data
            except json.JSONDecodeError as e:
                logger.error(f"Failed to parse JSON from Claude response for {pdf_path.name}: {e}")
                logger.debug(f"Claude response was: {response_text}")
                return {}

        except Exception as e:
            logger.error(f"Error analyzing PDF with Claude: {e}")
            return {}

    def _load_json_data(self, json_path: Path) -> Dict:
        """Load and parse JSON data"""
        try:
            with open(json_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
            logger.info(f"Successfully loaded {json_path.name}")
            return data
        except Exception as e:
            logger.error(f"Error loading JSON {json_path.name}: {e}")
            return {}

    def _process_folder(self, folder_path: Path, existing_bid_data: Optional[Dict] = None) -> Optional[Dict]:
        """Process a single folder containing bid documents"""
        logger.info(f"Processing folder: {folder_path.name}")

        # Find required files
        pdf_file = None
        json_file = None

        for file in folder_path.iterdir():
            if file.suffix.lower() == '.pdf':
                pdf_file = file
            elif file.name == 'stage_summary_data.json':
                json_file = file

        if not pdf_file or not json_file:
            logger.warning(f"Missing required files in {folder_path.name}")
            return None

        # Load JSON data first to get bid number
        json_data = self._load_json_data(json_file)

        if not json_data:
            logger.warning(f"Could not load JSON data from {folder_path.name}")
            return None

        # Get bid number for organizing documents
        bid_number = json_data.get('headerInfo', {}).get('tenderID', '') or folder_path.name

        # Check if documents already exist for this bid
        existing_documents = None
        if existing_bid_data:
            existing_documents = existing_bid_data.get('nexray_documents', [])
            if existing_documents:
                logger.info(f"Found {len(existing_documents)} existing documents for bid {bid_number}")

        # Upload files to S3 and create document records (will skip if documents exist)
        logger.info(f"📤 Checking/Uploading files for bid {bid_number}...")
        uploaded_documents = self._process_and_upload_files(folder_path, bid_number, existing_documents)

        if uploaded_documents:
            logger.info(f"✅ Successfully uploaded {len(uploaded_documents)} document(s) to S3")
        else:
            logger.warning(f"⚠️ No documents were uploaded for {folder_path.name}")

        # Extract data from PDF (commented out as per original code)
        pdf_text = self._extract_text_from_pdf(pdf_file)
        # pdf_analysis = self._analyze_pdf_with_llm(pdf_file, pdf_text)
        pdf_analysis = {'scope_of_work': "",
                        'items': [],
                        'seller_details': {},
                        'terms_and_conditions': ""}

        # Process and structure the data with document information
        return self._structure_bid_data(folder_path.name, pdf_analysis, json_data, pdf_file, uploaded_documents)

    def _structure_bid_data(self, folder_name: str, pdf_analysis: Dict,
                           json_data: Dict, pdf_file: Path, uploaded_documents: List[Dict] = None) -> Dict:
        """Structure the extracted data according to the target format"""

        if uploaded_documents is None:
            uploaded_documents = []

        # Extract basic info from JSON
        header_info = json_data.get('headerInfo', {})
        bids_list = json_data.get('bidsListSection', [])
        finance_summary = json_data.get('financeBidOpeningSummary', '')
        awarded_bids = json_data.get('awardedBids', [])

        # Create basic bid structure
        bid_data = {
            "b_id": f"folder_{folder_name}",
            "b_bid_number": [header_info.get('tenderID', '')],
            "b_category_name": [header_info.get('tenderTitle', '')[:100] + '...' if len(header_info.get('tenderTitle', '')) > 100 else header_info.get('tenderTitle', '')],
            "bd_category_name": [header_info.get('tenderTitle', '')],
            "b_scope_work": pdf_analysis.get('scope_of_work', ''),
            "is_high_value": [False],  # Default, could be determined by amount
            "ba_is_single_packet": [1 if len(bids_list) == 1 else 0],
            "nexray_documents": uploaded_documents  # Add uploaded document references
        }

        # Add title if available
        if 'tenderTitle' in header_info:
            bid_data["bbt_title"] = [header_info['tenderTitle']]

        # Create detailed info
        detailed_info = {
            "bidId": header_info.get('tenderID', folder_name),
            "isSinglePacket": len(bids_list) == 1,
            "nexray_documents": uploaded_documents,  # Add document references for backend
            "sections": {
                "bidDetails": {
                    "bidNumber": header_info.get('tenderID', ''),
                    "bidStatus": "Active",  # Default
                    "quantity": str(len(pdf_analysis.get('items', []))),
                    "bidValidity": "30 ( Days)",  # Default
                    "buyerDetails": {
                        "address": header_info.get('organisationChain', '').replace('||', ', '),
                        "ministry": "Ministry",  # Could be extracted from org chain
                        "department": "Department",
                        "organisation": header_info.get('organisationChain', '').split('||')[0] if '||' in header_info.get('organisationChain', '') else '',
                        "office": header_info.get('organisationChain', '').split('||')[-1] if '||' in header_info.get('organisationChain', '') else ''
                    }
                },
                "evaluation": {
                    "sellers": [],
                    "offeredItems": bid_data["b_category_name"][0] if bid_data["b_category_name"] else "",
                    "summary": {
                        "totalSellers": len(bids_list),
                        "lowestPrice": 0,
                        "highestPrice": 0,
                        "priceRange": 0,
                        "l1Winner": "",
                        "l1Winner_details": pdf_analysis.get('seller_details', {})
                    }
                }
            }
        }

        # Process sellers from bids list
        seller_prices = []
        for i, bid in enumerate(bids_list, 1):
            seller = {
                "sellerName": bid.get('Bidder Name', ''),
                "status": bid.get('Status', 'Unknown'),
                "statusType": "MSE" if "MSE" in bid.get('Status', '') else "",
            }

            # Try to extract price from finance summary or awarded bids
            if bid.get('Bidder Name') in str(finance_summary):
                # Parse financial data if available
                # This would need more specific parsing based on the actual format
                pass

            # Check if this is the winner
            for awarded in awarded_bids:
                if awarded.get('Bidder Name') == bid.get('Bidder Name'):
                    seller["totalPrice"] = awarded.get('Awarded Value', '0')
                    seller["rank"] = "L1"
                    detailed_info["sections"]["evaluation"]["summary"]["l1Winner"] = bid.get('Bidder Name', '')
                    # Add more detailed winner info
                    if pdf_analysis.get('seller_details'):
                        detailed_info["sections"]["evaluation"]["summary"]["l1Winner_details"] = pdf_analysis.get('seller_details')
                    seller_prices.append(float(awarded.get('Awarded Value', '0').replace(',', '')))
                    break

            detailed_info["sections"]["evaluation"]["sellers"].append(seller)

        # Update price summary
        if seller_prices:
            detailed_info["sections"]["evaluation"]["summary"]["lowestPrice"] = min(seller_prices)
            detailed_info["sections"]["evaluation"]["summary"]["highestPrice"] = max(seller_prices)
            detailed_info["sections"]["evaluation"]["summary"]["priceRange"] = max(seller_prices) - min(seller_prices)

        # Add terms and conditions to scope of work if available
        if pdf_analysis.get('terms_and_conditions'):
            if bid_data["b_scope_work"]:
                bid_data["b_scope_work"] += "\n\nTerms & Conditions:\n" + pdf_analysis.get('terms_and_conditions')
            else:
                bid_data["b_scope_work"] = "Terms & Conditions:\n" + pdf_analysis.get('terms_and_conditions')

        bid_data["detailedInfo"] = detailed_info

        return bid_data

    def process_all_folders(self):
        """Process all folders in the root directory"""
        logger.info(f"Starting to process folders in {self.root_directory}")

        folders = [f for f in self.root_directory.iterdir() if f.is_dir()]
        total_folders = len(folders)

        logger.info(f"Found {total_folders} folders to process")

        processed_count = 0
        for folder in folders:
            try:
                # Check if already processed
                folder_id = f"folder_{folder.name}"
                existing_bid_data = None
                existing_bid_index = None

                # Find existing bid data if it exists
                for idx, bid in enumerate(self.processed_data['bids']):
                    if bid.get('b_id', '') == folder_id:
                        existing_bid_data = bid
                        existing_bid_index = idx
                        break

                if existing_bid_data:
                    # Check if it has detailed info and documents
                    has_detailed_info = 'detailedInfo' in existing_bid_data
                    has_documents = existing_bid_data.get('nexray_documents') and len(existing_bid_data.get('nexray_documents', [])) > 0

                    if has_detailed_info and has_documents:
                        logger.info(f"Folder {folder.name} already fully processed with documents, skipping...")
                        continue
                    else:
                        logger.info(f"Folder {folder.name} exists but incomplete, re-processing...")

                # Process folder (pass existing data to avoid re-uploading documents)
                bid_data = self._process_folder(folder, existing_bid_data)

                if bid_data:
                    if existing_bid_index is not None:
                        # Update existing bid data
                        self.processed_data['bids'][existing_bid_index] = bid_data
                        logger.info(f"Updated existing bid data for {folder.name}")
                    else:
                        # Add new bid data
                        self.processed_data['bids'].append(bid_data)

                    processed_count += 1

                    # Save data after each successful processing
                    self._save_data()

                    logger.info(f"Processed {processed_count}/{total_folders} folders")

                    # Add a small delay to avoid overwhelming the API
                    time.sleep(1)

            except Exception as e:
                logger.error(f"Error processing folder {folder.name}: {e}")
                continue

        logger.info(f"Processing complete. Processed {processed_count} new folders.")
        return self.processed_data

def main():
    """Main function to run the bid processor"""

    # Parse command line arguments
    parser = argparse.ArgumentParser(description='Process bid folders')
    parser.add_argument(
        'root_directory',
        help='Root directory containing bid folders to process'
    )
    parser.add_argument(
        '--output',
        default='processed_bids_data.json',
        help='Output file name (default: processed_bids_data.json)'
    )

    args = parser.parse_args()

    # Configuration
    ROOT_DIRECTORY = args.root_directory
    OUTPUT_FILE = args.output
    # Claude API Configuration
    ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY")

    if not os.path.exists(ROOT_DIRECTORY):
        print(f"Error: Directory {ROOT_DIRECTORY} does not exist!")
        return

    try:
        # Initialize and run processor
        processor = BidProcessor(ROOT_DIRECTORY, OUTPUT_FILE, ANTHROPIC_API_KEY)
        result = processor.process_all_folders()

        print(f"\nProcessing complete!")
        print(f"Total bids processed: {len(result['bids'])}")
        print(f"Results saved to: {OUTPUT_FILE}")

    except Exception as e:
        logger.error(f"Error in main processing: {e}")
        print(f"Error: {e}")

if __name__ == "__main__":
    main()

def main():
    """Main function to run the bid processor"""

    # Configuration
    ROOT_DIRECTORY = "/content"
    OUTPUT_FILE = "processed_bids_data.json"
    # Claude API Configuration
    ANTHROPIC_API_KEY = "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"


    if not os.path.exists(ROOT_DIRECTORY):
        print(f"Error: Directory {ROOT_DIRECTORY} does not exist!")
        return

    try:
        # Initialize and run processor
        processor = BidProcessor(ROOT_DIRECTORY, OUTPUT_FILE, ANTHROPIC_API_KEY)
        result = processor.process_all_folders()

        print(f"\nProcessing complete!")
        print(f"Total bids processed: {len(result['bids'])}")
        print(f"Results saved to: {OUTPUT_FILE}")

    except Exception as e:
        logger.error(f"Error in main processing: {e}")
        print(f"Error: {e}")

if __name__ == "__main__":
    main()