import json
from collections import defaultdict
import re

def format_indian_currency(amount):
    """
    Format amount in Indian currency format ₹xx,xx,xx,xxx.zz
    """
    if amount == 0:
        return "₹0.00"

    # Convert to string with 2 decimal places
    amount_str = f"{amount:.2f}"

    # Split into integer and decimal parts
    integer_part, decimal_part = amount_str.split('.')

    # Add commas in Indian format (last 3 digits, then groups of 2)
    if len(integer_part) > 3:
        # Reverse the string for easier processing
        reversed_int = integer_part[::-1]

        # First group of 3 digits
        formatted_parts = [reversed_int[:3]]

        # Remaining digits in groups of 2
        remaining = reversed_int[3:]
        for i in range(0, len(remaining), 2):
            formatted_parts.append(remaining[i:i+2])

        # Join and reverse back
        formatted_int = ','.join(formatted_parts)[::-1]
    else:
        formatted_int = integer_part

    return f"₹{formatted_int}.{decimal_part}"

def parse_price(price_str):
    """
    Parse price string and return float value
    """
    if not price_str:
        return 0.0

    # Remove any non-numeric characters except decimal point
    cleaned_price = re.sub(r'[^\d.]', '', str(price_str))

    try:
        return float(cleaned_price)
    except (ValueError, TypeError):
        return 0.0

def extract_location_from_bid(bid):
    """
    Extract location from bid's buyer details
    """
    try:
        # Try to get location from bidDetails
        if 'detailedInfo' in bid and 'sections' in bid['detailedInfo']:
            sections = bid['detailedInfo']['sections']

            # Check for bidDetails (can be nested in parent/child structure)
            if 'bidDetails' in sections:
                bid_details = sections['bidDetails']

                # Handle parent-child structure
                if isinstance(bid_details, dict) and 'parent' in bid_details:
                    buyer_details = bid_details['parent'].get('buyerDetails', {})
                elif isinstance(bid_details, dict):
                    buyer_details = bid_details.get('buyerDetails', {})
                else:
                    buyer_details = {}

                if buyer_details and 'address' in buyer_details:
                    address = buyer_details['address']
                    # Extract city name from address (usually after the asterisks)
                    # Format is typically "*********** CITY_NAME"
                    if isinstance(address, str):
                        parts = address.split()
                        for part in reversed(parts):  # Start from the end
                            if part and not part.startswith('*'):
                                return part.strip().title()

        return None
    except Exception:
        return None

def check_keyword_match_with_details(bd_category_list, keywords, search_mode="any"):
    """
    Check if keywords match in bd_category_name and return which keywords matched

    Args:
        bd_category_list (list): List of category strings from bd_category_name
        keywords (str or list): Single keyword string or list of keywords
        search_mode (str): "any" (OR logic) or "all" (AND logic)

    Returns:
        tuple: (bool: match_found, list: matched_keywords)
    """
    if not bd_category_list or not keywords:
        return False, []

    # Convert keywords to list if it's a single string
    if isinstance(keywords, str):
        keyword_list = [keywords.strip().lower()]
    else:
        keyword_list = [kw.strip().lower() for kw in keywords if kw.strip()]

    if not keyword_list:
        return False, []

    # Combine all category strings into one text for searching
    category_text = ' '.join(bd_category_list).lower()

    # Find which keywords matched
    matched_keywords = []
    for keyword in keyword_list:
        if keyword in category_text:
            matched_keywords.append(keyword)

    # Determine if overall match based on search mode
    if search_mode.lower() == "all":
        # AND logic: all keywords must be found
        match_found = len(matched_keywords) == len(keyword_list)
    else:
        # OR logic (default): any keyword found
        match_found = len(matched_keywords) > 0

    return match_found, matched_keywords

def analyze_bidders(input_file_path, output_file_path, search_keywords=None, search_mode="any"):
    """
    Analyze bid data to extract seller statistics and save to output JSON file.

    Args:
        input_file_path (str): Path to input JSON file
        output_file_path (str): Path to output JSON file
        search_keywords (str or list, optional): Keywords to filter bids by bd_category_name
        search_mode (str): "any" for OR logic, "all" for AND logic (default: "any")
    """

    # Read the input JSON file
    with open(input_file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)

    # Dictionary to store seller statistics
    seller_stats = defaultdict(lambda: {
        'seller_name': '',
        'total_bids_applied': 0,
        'total_bids_qualified': 0,
        'total_bids_disqualified': 0,
        'total_bids_won_l1': 0,
        'total_bids_l2': 0,
        'total_amount_won_l1': 0.0,
        'locations': set(),  # Track unique locations
        'matched_keywords': set(),  # Track matched keywords
        'seller_details': {},  # Track contact details for L1 winners
        'special_status': set()
    })

    # Track filtered bids for reporting
    total_bids_processed = 0
    filtered_bids_count = 0
    bids_not_matched = []

    # Process each bid
    for bid in data['bids']:
        total_bids_processed += 1

        # Apply keyword filtering if search_keywords is provided
        matched_keywords = []
        if search_keywords:
            bd_category_list = [bid.get('title', '')] + [bid.get('description', '')] + bid.get('metadata', {}).get('apiData', {}).get('bd_category_name', []) + bid.get('metadata', {}).get('apiData', {}).get('bbt_title', [])
            match_found, matched_keywords = check_keyword_match_with_details(
                bd_category_list, search_keywords, search_mode
            )
            if not match_found:
                bids_not_matched.append(bid.get('bidNumber'))
                continue  # Skip this bid if it doesn't match the keywords

        filtered_bids_count += 1

        # Extract location from this bid
        bid_location = extract_location_from_bid(bid)

        # Check if detailedInfo and evaluation sections exist
        if 'detailedInfo' not in bid or 'sections' not in bid['detailedInfo']:
            continue

        sections = bid['detailedInfo']['sections']
        if 'evaluation' not in sections or 'sellers' not in sections['evaluation']:
            continue

        sellers = sections['evaluation']['sellers']

        # Check if L1 winner details are available in the evaluation summary
        l1_winner_details = None
        if ('summary' in sections['evaluation'] and
            'l1Winner_details' in sections['evaluation']['summary'] and
            sections['evaluation']['summary']['l1Winner_details']):
            l1_winner_details = sections['evaluation']['summary']['l1Winner_details']

        # Process each seller in this bid
        for seller in sellers:
            seller_name = seller.get('sellerName', '').strip()
            if not seller_name:
                continue

            # Initialize seller name if first encounter
            if seller_stats[seller_name]['seller_name'] == '':
                seller_stats[seller_name]['seller_name'] = seller_name

            # Add location if available
            if bid_location:
                seller_stats[seller_name]['locations'].add(bid_location)

            # Add matched keywords
            for keyword in matched_keywords:
                seller_stats[seller_name]['matched_keywords'].add(keyword)

            # Count total bids applied
            seller_stats[seller_name]['total_bids_applied'] += 1

            # Check if seller has a rank (if they have rank, they are qualified)
            rank = seller.get('rank', '').strip().upper()

            if rank:  # If rank exists, seller is qualified
                seller_stats[seller_name]['total_bids_qualified'] += 1

                # Check specific ranks
                if rank == 'L1':
                    seller_stats[seller_name]['total_bids_won_l1'] += 1

                    # Add the winning amount for L1 bids
                    total_price = seller.get('totalPrice', '0')
                    amount = parse_price(total_price)
                    seller_stats[seller_name]['total_amount_won_l1'] += amount

                    # Store L1 winner contact details if available and not already stored
                    if (l1_winner_details and
                        not seller_stats[seller_name]['seller_details']):

                        details = {}

                        # Extract address if available
                        if ('address' in l1_winner_details and
                            l1_winner_details['address'] and
                            l1_winner_details['address'].strip()):
                            details['address'] = l1_winner_details['address'].strip()

                        # Extract contact if available
                        if ('contact' in l1_winner_details and
                            l1_winner_details['contact'] and
                            l1_winner_details['contact'].strip()):
                            details['contact'] = l1_winner_details['contact'].strip()

                        # Only store if we have some meaningful details
                        if details:
                            seller_stats[seller_name]['seller_details'] = details

                elif rank == 'L2':
                    seller_stats[seller_name]['total_bids_l2'] += 1
            else:
                # No rank means disqualified
                seller_stats[seller_name]['total_bids_disqualified'] += 1

            # Extract special status (MSE/MII)
            special_statuses = []

            # Check mseStatus field
            mse_status = seller.get('mseStatus', '')
            if mse_status and mse_status not in ['N/A', '']:
                special_statuses.append(mse_status)

            # Check status field for MSE/MII indicators
            main_status = seller.get('status', '')
            if 'MSE' in main_status:
                special_statuses.append('MSE')
            if 'MII' in main_status:
                special_statuses.append('MII')

            # Check statusType field
            status_type = seller.get('statusType', '')
            if status_type:
                if 'MSE' in status_type:
                    special_statuses.append('MSE')
                if 'MII' in status_type:
                    special_statuses.append('MII')
                if 'Under PMA' in status_type:
                    special_statuses.append('Under PMA')

            # Add to set (automatically handles duplicates)
            for status in special_statuses:
                seller_stats[seller_name]['special_status'].add(status)

    # Convert sets to sorted lists for JSON serialization
    for seller_name in seller_stats:
        seller_stats[seller_name]['special_status'] = sorted(list(seller_stats[seller_name]['special_status']))
        seller_stats[seller_name]['locations'] = sorted(list(seller_stats[seller_name]['locations']))
        seller_stats[seller_name]['matched_keywords'] = sorted(list(seller_stats[seller_name]['matched_keywords']))

    # Convert to list and sort by total_bids_applied in descending order
    sellers_list = []
    for seller_name, stats in seller_stats.items():
        seller_entry = {
            'seller_name': stats['seller_name'],
            'total_bids_applied': stats['total_bids_applied'],
            'total_bids_qualified': stats['total_bids_qualified'],
            'total_bids_disqualified': stats['total_bids_disqualified'],
            'total_bids_won_l1': stats['total_bids_won_l1'],
            'total_bids_l2': stats['total_bids_l2'],
            'total_amount_won_l1_formatted': format_indian_currency(stats['total_amount_won_l1']),
            'total_amount_won_l1_raw': stats['total_amount_won_l1'],
            'seller_locations': stats['locations'],
            'keyword_matches': stats['matched_keywords'],
            'special_status': ', '.join(stats['special_status']) if stats['special_status'] else 'None'
        }

        # Add seller details only if available
        if stats['seller_details']:
            seller_entry['seller_details'] = stats['seller_details']

        sellers_list.append(seller_entry)

    # Sort by total_bids_applied in descending order
    sellers_list.sort(key=lambda x: x['total_bids_applied'], reverse=True)

    # Prepare filter information for metadata
    filter_info = {}
    if search_keywords:
        if isinstance(search_keywords, str):
            filter_info['keywords'] = [search_keywords]
        else:
            filter_info['keywords'] = list(search_keywords)
        filter_info['search_mode'] = search_mode
        filter_info['total_bids_in_source'] = total_bids_processed
        filter_info['filtered_bids_count'] = filtered_bids_count
    else:
        filter_info = None

    # Add serial numbers and create final output
    output_data = {
        'metadata': {
            'total_unique_sellers': len(sellers_list),
            'analysis_date': data['metadata']['scrapedAt'],
            'source_total_bids': data['metadata']['totalBids'],
            'total_amount_won_all_l1_bids': format_indian_currency(sum(seller['total_amount_won_l1_raw'] for seller in sellers_list)),
            'filter_applied': filter_info,
            'bids_not_matched': bids_not_matched
        },
        'sellers': []
    }

    for i, seller in enumerate(sellers_list, 1):
        seller_entry = {
            'serial_number': i,
            'seller_name': seller['seller_name'],
            'total_bids_applied': seller['total_bids_applied'],
            'total_bids_qualified': seller['total_bids_qualified'],
            'total_bids_disqualified': seller['total_bids_disqualified'],
            'total_bids_won_l1': seller['total_bids_won_l1'],
            'total_bids_l2': seller['total_bids_l2'],
            'total_amount_won_l1': seller['total_amount_won_l1_formatted'],
            'seller_locations': seller['seller_locations'],
            'keyword_matches': seller['keyword_matches'],
            'special_status': seller['special_status']
        }

        # Add seller details only if available
        if 'seller_details' in seller and seller['seller_details']:
            seller_entry['seller_details'] = seller['seller_details']

        output_data['sellers'].append(seller_entry)

    # Write to output JSON file
    with open(output_file_path, 'w', encoding='utf-8') as file:
        json.dump(output_data, file, indent=2, ensure_ascii=False)

    print(f"Analysis complete! Found {len(sellers_list)} unique sellers.")

    # Print filter information
    if search_keywords:
        keywords_str = ', '.join(filter_info['keywords'])
        print(f"Filter applied: Keywords '{keywords_str}' with '{search_mode}' logic")
        print(f"Processed {filtered_bids_count} out of {total_bids_processed} total bids")
    else:
        print("No keyword filter applied - analyzed all bids")

    print(f"Results saved to: {output_file_path}")

    # Print summary statistics
    print(f"\nTotal amount won by all L1 bidders: {output_data['metadata']['total_amount_won_all_l1_bids']}")
    print("\nTop 5 sellers by bid participation:")
    for i, seller in enumerate(sellers_list[:5], 1):
        print(f"{i}. {seller['seller_name']}: {seller['total_bids_applied']} bids")
        print(f"   - Qualified: {seller['total_bids_qualified']}, Disqualified: {seller['total_bids_disqualified']}")
        print(f"   - L1 wins: {seller['total_bids_won_l1']}, L2: {seller['total_bids_l2']}")
        print(f"   - Amount won: {seller['total_amount_won_l1_formatted']}")
        print(f"   - Locations: {', '.join(seller['seller_locations']) if seller['seller_locations'] else 'N/A'}")
        print(f"   - Keywords: {', '.join(seller['keyword_matches']) if seller['keyword_matches'] else 'N/A'}")

        # Show seller details if available
        if 'seller_details' in seller and seller['seller_details']:
            details = seller['seller_details']
            if 'address' in details:
                print(f"   - Address: {details['address']}")
            if 'contact' in details:
                print(f"   - Contact: {details['contact']}")

    # Check for any data validation issues
    validation_issues = [seller for seller in sellers_list
                        if (seller['total_bids_qualified'] + seller['total_bids_disqualified'] != seller['total_bids_applied']) or
                           (seller['total_bids_won_l1'] + seller['total_bids_l2'] > seller['total_bids_qualified'])]

    if validation_issues:
        print(f"\nWarning: Found {len(validation_issues)} sellers with data validation issues.")
    else:
        print("\n✓ All seller data passed validation checks.")

# Helper functions for common use cases
def analyze_single_keyword(input_file, output_file, keyword):
    """Search for a single keyword"""
    return analyze_bidders(input_file, output_file, keyword, "any")

def analyze_any_keywords(input_file, output_file, keywords_list):
    """Search for any of the keywords (OR logic)"""
    return analyze_bidders(input_file, output_file, keywords_list, "any")

def analyze_all_keywords(input_file, output_file, keywords_list):
    """Search for all keywords (AND logic)"""
    return analyze_bidders(input_file, output_file, keywords_list, "all")

# Example usage
if __name__ == "__main__":
    # Replace these with your actual file paths
    input_file = "/content/processed_bids_data (1).json"
    output_file = "seller_analysis_output.json"

    # Example 1: Single keyword search
    # analyze_single_keyword(input_file, "stationery_analysis.json", "paper")

    # Example 2: Multiple keywords with OR logic (any keyword match)
    analyze_any_keywords(input_file, "exxon_resellers.json", ["lubricant", "grease", "Mobil", "gear oil", "hydraulic oil", "engine oil"])

    # Example 3: Multiple keywords with AND logic (all keywords must match)
    # analyze_all_keywords(input_file, "spices_analysis.json", ["spices", "chilli"])

    # Example 4: No filter (analyze all bids) - will show locations but no keyword matches
    # try:
    #     analyze_bidders(input_file, output_file)
    # except FileNotFoundError:
    #     print(f"Error: Input file '{input_file}' not found.")
    #     print("Please make sure the file exists in the current directory.")
    # except json.JSONDecodeError:
    #     print(f"Error: Invalid JSON format in '{input_file}'.")
    # except Exception as e:
    #     print(f"An error occurred: {str(e)}")