import json
import re

def clean_seller_name(name):
    """Clean seller name by removing NBSP, extra spaces, and 'Under PMA'"""
    if not name or name == "-":
        return ""

    # Remove NBSP (0xa0) and other unicode spaces
    name = name.replace('\xa0', ' ')
    # Strip whitespace
    name = name.strip()
    # Remove "Under PMA" if found (case insensitive)
    name = re.sub(r'\s*Under\s+PMA\s*', '', name, flags=re.IGNORECASE)
    # Clean up multiple spaces
    name = ' '.join(name.split())

    return name.strip()

def extract_common_items(sellers_list):
    """Extract common offeredItems/itemCategories from sellers"""
    common_items = {}

    for seller in sellers_list:
        for key in ["offeredItem", "offeredItems", "itemCategories"]:
            if key in seller and seller[key] and seller[key] != "-":
                common_items[key] = seller[key]
                break  # Use first non-empty value found

    return common_items

def clean_seller_data(seller, keys_to_remove):
    """Clean individual seller data"""
    cleaned = {}

    for key, value in seller.items():
        # Skip keys to remove
        if key in keys_to_remove:
            continue

        # Skip keys with "-" values
        if value == "-":
            continue

        # Clean seller name
        if key == "sellerName":
            cleaned_name = clean_seller_name(value)
            if cleaned_name:  # Only add if name is not empty
                cleaned[key] = cleaned_name
        else:
            cleaned[key] = value

    return cleaned

def merge_sellers(tech_sellers, fin_sellers, is_single_packet):
    """Merge technical and financial evaluation sellers"""

    # Keys to remove from seller entries
    keys_to_remove = {
        "serialNo", "participatedOn", "totalPriceNumeric", "mseCategory",
        "offeredItem", "offeredItems", "itemCategories"
    }

    seller_map = {}

    # If single packet (ba_is_single_packet = 1), discard technical evaluation completely
    # as it contains meaningless data with empty seller names
    if is_single_packet:
        tech_sellers = []

    # Process technical sellers first
    for seller in tech_sellers:
        cleaned_seller = clean_seller_data(seller, keys_to_remove)
        seller_name = cleaned_seller.get("sellerName")

        if seller_name:
            seller_map[seller_name] = cleaned_seller

    # Process financial sellers (they take precedence)
    for seller in fin_sellers:
        cleaned_seller = clean_seller_data(seller, keys_to_remove)
        seller_name = cleaned_seller.get("sellerName")

        if seller_name:
            # Merge with existing data or create new entry
            if seller_name in seller_map:
                seller_map[seller_name].update(cleaned_seller)
            else:
                seller_map[seller_name] = cleaned_seller

    # Extract common items before removing them from sellers
    all_sellers = tech_sellers + fin_sellers
    common_items = extract_common_items(all_sellers)

    # Convert to list and remove empty entries
    merged_sellers = [seller for seller in seller_map.values() if seller.get("sellerName")]

    return merged_sellers, common_items

def process_detailed_info(detailed_info, is_single_packet):
    """Process the detailedInfo section"""
    if not detailed_info or "sections" not in detailed_info:
        return detailed_info

    processed = detailed_info.copy()
    sections = detailed_info["sections"].copy()

    # Get sellers from technical and financial evaluation
    tech_sellers = []
    fin_sellers = []

    if "technicalEvaluation" in sections and "sellers" in sections["technicalEvaluation"]:
        tech_sellers = sections["technicalEvaluation"]["sellers"]

    if "financialEvaluation" in sections and "qualifiedSellers" in sections["financialEvaluation"]:
        fin_sellers = sections["financialEvaluation"]["qualifiedSellers"]

    # Merge sellers
    merged_sellers, common_items = merge_sellers(tech_sellers, fin_sellers, is_single_packet)

    # Create new evaluation section
    if merged_sellers or common_items:
        evaluation_section = {
            "sellers": merged_sellers,
            **common_items
        }

        # Preserve financial summary if it exists
        if "financialEvaluation" in sections and "summary" in sections["financialEvaluation"]:
            evaluation_section["summary"] = sections["financialEvaluation"]["summary"]

        sections["evaluation"] = evaluation_section

    # Remove original evaluation sections
    sections.pop("technicalEvaluation", None)
    sections.pop("financialEvaluation", None)

    processed["sections"] = sections
    return processed

def process_bids_data(data):
    """Process and clean the bids data according to requirements"""

    # Keep metadata as is
    processed_data = {
        "metadata": data["metadata"],
        "bids": []
    }

    # Keys to remove from bid level
    keys_to_remove = {
        "id", "b_status", "b_bid_type", "b_is_bunch", "b_type",
        "b_bid_to_ra", "b_buyer_status", "b_eval_type",
        "final_start_date_sort", "final_end_date_sort", "b_is_inactive",
        "b_cat_id", "is_rc_bid", "bd_details_is_boq", "bd_details_new_boq",
        "b_total_quantity", "ba_official_details_minName", "ba_official_details_deptName",
        "b_is_custom_item", "b_ra_to_bid", "ra_b_status", "b.b_created_by", "ra_b_buyer_status"
    }
    cnt_1 = 0
    cnt_2 = 0
    print("Total number of bids:", len(data["bids"]))
    for bid in data["bids"]:
        processed_bid = {}

        cnt_1 += 1
        # print(cnt_1)

        # Get is_single_packet value
        is_single_packet = bid.get("ba_is_single_packet", [0])[0] == 1

        # Process each field
        for key, value in bid.items():
            if key in keys_to_remove:
                continue

            elif key == "b_id":
                # Replace with URL, use b_id_parent if available
                bid_id = bid.get("b_id_parent", [value[0]])[0] if "b_id_parent" in bid else value[0]
                processed_bid["b_id"] = f"https://bidplus.gem.gov.in/showbidDocument/{bid_id}"

            elif key == "b_bid_number":
                # Append parent bid number if available
                bid_number = value[0]
                if "b_bid_number_parent" in bid:
                    parent_number = bid["b_bid_number_parent"][0]
                    bid_number = f"{bid_number} (Parent: {parent_number})"
                processed_bid[key] = [bid_number]

            elif key == "bd_category_name":
                processed_bid[key] = value
                # Add b_scope_work after bd_category_name
                processed_bid["b_scope_work"] = ""

            elif key == "detailedInfo":
                processed_bid[key] = process_detailed_info(value, is_single_packet)

            elif key not in ["b_bid_number_parent", "b_id_parent"]:
                # Copy other fields as is (except parent fields which we've already used)
                processed_bid[key] = value

        processed_data["bids"].append(processed_bid)

    return processed_data

def main():
    """Main function to process the bids data"""

    # Read the JSON file
    try:
        with open('/content/gem_bids_complete.json', 'r', encoding='utf-8') as file:
            data = json.load(file)
    except FileNotFoundError:
        print("Error: sample_bids_data.json file not found!")
        return
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        return

    # Process the data
    print("Processing bids data...")
    processed_data = process_bids_data(data)

    # Save the processed data
    try:
        with open('processed_bids_data.json', 'w', encoding='utf-8') as file:
            json.dump(processed_data, file, indent=2, ensure_ascii=False)

        print("✅ Data processing completed successfully!")
        print("📄 Output saved to 'processed_bids_data.json'")
        print(f"📊 Processed {len(processed_data['bids'])} bids")

        # Print summary
        total_sellers = 0
        for bid in processed_data['bids']:
            if 'detailedInfo' in bid and 'sections' in bid['detailedInfo']:
                if 'evaluation' in bid['detailedInfo']['sections']:
                    total_sellers += len(bid['detailedInfo']['sections']['evaluation'].get('sellers', []))

        print(f"👥 Total unique sellers across all bids: {total_sellers}")

    except Exception as e:
        print(f"Error saving processed data: {e}")

if __name__ == "__main__":
    main()