o
    -phV                     @   s  d dl Z d dlZd dlmZ d dlmZ d dlZd dlZd dlZd dl	m
Z
 d dlZd dlZd dlZd dlmZmZ ejejdd eeZd)d
dZdd Zdd Z			d*dedededededeeeef fddZdd Zdd Zdd Z d d! Z!d"d# Z"d$d% Z#d+d'd(Z$dS ),    N)canvas)letter)Image)UnionTuplez4%(asctime)s - %(name)s - %(levelname)s - %(message)s)levelformatmerged_document.pdfTc                 C   s  dd t t| D }|std dS g }d}|D ]_}tj| |}z9t|d*}t	|}	t
|	j}
|tj|d |||
 d |d	 ||
7 }W d   n1 sWw   Y  W q tyz } ztd
| d|  W Y d}~qd}~ww |std dS t|}t }t	|}||jd  d}|D ][}z:t|d d)}t	|}	|	jD ]}|rt||}|| n|| |d7 }qW d   n1 sw   Y  W q ty } ztd|d  d|  W Y d}~qd}~ww tj| |}zpt|d}|| W d   n	1 sw   Y  td|  tdt
|j  tdt
|  td |D ]-}|d |d krSd|d  }nd|d  d|d  }td|d  d|  q@W dS  ty } ztd|  W Y d}~dS d}~ww )a:  
    Merge all PDF files in a directory with an index table at the beginning.
    
    Args:
        directory_path (str): Path to directory containing PDF files
        output_filename (str): Name of the output merged PDF file
        add_page_numbers (bool): Whether to add visible page numbers to each page
    c                 S      g | ]}|  d r|qS .pdflowerendswith.0f r   5/var/www/html/minaions-tender/ai-engine/pdf_merger.py
<listcomp>       z)merge_pdfs_with_index.<locals>.<listcomp>z$No PDF files found in the directory.N   rbr      )name
start_pageend_page	file_pathzError reading : z&No valid PDF files could be processed.r   zError processing r   wbu%   ✅ Merged PDF created successfully: u   📄 Total pages: u   📑 Documents merged: u   
📋 Document Summary:r   r   zPage zPages -u     • zError saving merged PDF: )sortedoslistdirloggerinfopathjoinopenPyPDF2	PdfReaderlenpagesappendsplitext	Exceptioncreate_index_page	PdfWriteradd_pageadd_page_number_to_pagewrite)directory_pathoutput_filenameadd_page_numbers	pdf_filesdocument_infocurrent_pagepdf_filepdf_pathfile
pdf_reader	num_pagese	index_pdf
pdf_writerindex_readerpage_numberdoc_infopagenumbered_pageoutput_pathoutput_filedoc
pages_textr   r   r   merge_pdfs_with_index   s   












rL   c              	   C   sj  t  }tj|td}t\}}|dd |ddd}||| d |d d |ddd |	d |
d	|d
 |d	 |d
  |dd |ddd |d|d d ||d |d d |	d |
d	|d |d	 |d  |dd |d }t| D ]\}}|d
k r|d|d  nr|d dkr|ddd |jd	|d |d dddd |ddd tdd|d }	t|	dkr|	dd  d! }	|d||	 |d" |d# krt|d" }
n|d"  d$|d#  }
||
dd}||d | ||
 |d%8 }q|dd& |ddd |ddd't|   ||d
 dd( |  |d |S ))z Create a table of contents page.pagesizezHelvetica-Bold   zTable of Contentsr   <   333333?r   2   P      r   n   zDocument Name   Pagesg      ?x   	Helvetica      z... (additional documents)gffffff?   d   )fillstrokez^\d+_ r   (   N%   ...r   r   r          z#Generated index - Total documents: zPage 1)ioBytesIOr   Canvasr   setFontstringWidth
drawStringsetStrokeColorRGBsetLineWidthlinesetFillColorRGB	enumeraterectresubr+   strsaveseek)r9   buffercwidthheighttitle_width
y_positionirJ   doc_name	page_text
page_widthr   r   r   r0   u   sT   



r0   c                 C   s   t  }tj|td}t| jj}t| jj}|	dd d| d}|
|dd}|| d }d}	|ddd |||	| |  |d	 t|}
|
jd	 }| | | S )
z Add a page number to a PDF page.rM   rY   
   z- z -r      rQ   r   )rf   rg   r   rh   r   floatmediaboxry   rz   ri   rj   ro   rk   ru   rv   r)   r*   r,   
merge_page)rF   rD   packetcanr   page_heightr   
text_width
x_positionr|   page_number_pdfpage_number_pager   r   r   r3      s"   



r3   rR   r   
input_pathrH   target_size_percentmax_attemptspreserve_qualityreturnc              
   C   s2  t j| sddddifS |dvrddddifS t j| }||d  }|du r8t j| d	 }| d
| d}td tdt|  tdt| d| d zzt	| }|||d	d	d	d	dt
|d	}	t|||||||	\}
}}|
rt j|}||	d< || | d |	d< || d |	d< ||	d< td tdt|  td|	d dd td|	d dd d||	fW W dt v r|  S S dd|	fW W dt v r|  S S  ty } zddddt| ifW  Y d}~W dt v r|  S S d}~ww dt v r|  w w ) aI  
    Compress a PDF file to achieve a target file size reduction.
    
    Args:
        input_path (str): Path to the input PDF file
        output_path (str): Path for the compressed PDF (if None, adds '_compressed' to original name)
        target_size_percent (int): Target size as percentage of original (25, 50, or 75)
        max_attempts (int): Maximum compression attempts to reach target size
        preserve_quality (bool): Whether to prioritize quality over exact size target
    
    Returns:
        Tuple[bool, str, dict]: (success, output_path, compression_stats)
    Fr`   errorzInput file does not exist)   rR   K   z$Target size must be 25%, 50%, or 75%r]   Nr   _compressed_percent.pdfu$   🗜️  Starting PDF compression...u   📄 Original size: u   🎯 Target size:  (%))	original_sizetarget_sizetarget_percentattempts
final_sizecompression_ratioachieved_percentmethod_usedpages_processedr   r   r   r   u   ✅ Compression successful!u   📊 Final size: u   📉 Compression: .1fz% reductionu   🎯 Achieved: z% of original sizeTrJ   zCompression failed: )r"   r&   existsgetsizer.   r$   r%   format_file_sizefitzr(   r+   attempt_compression_strategieslocalscloser/   rt   )r   rH   r   r   r   r   r   	base_namerJ   statssuccess
final_pathmethodr   r@   r   r   r   compress_pdf   sp   







$

r   c                    s  | }d fddfd fddfd fddfd fd	dfg}|D ]\}	}
zt d
|	dd  d |
|}|rtj|rtj|}|d  d7  < t dt| d||d  d dd ||ksn|s| krxt	
|  || kr|  d |	fW   S |r||d kr| krt	
|  || kr|  d |	fW   S || kr|  t|}tj|r|	dkrt	
|  d |	fW   S W q% ty } zt d|	 dt|  W Y d}~q%d}~ww || kr|  dS )z<Try different compression strategies to achieve target size.basic_compressionc                    s
   t |  S N)basic_pdf_compressiond)rH   r   r   <lambda>@  s   
 z0attempt_compression_strategies.<locals>.<lambda>image_optimizationc                       t |  S r   ) compress_with_image_optimizationr   rH   r   r   r   r   A      aggressive_compressionc                    r   r   )aggressive_pdf_compressionr   r   r   r   r   B  r   hybrid_compressionc                    r   r   )hybrid_compression_approachr   r   r   r   r   C  r   u   🔄 Trying _ rc   r   r   u      📏 Size achieved: r   r   r]   r   r   Tg?u      ❌ z	 failed: N)Fr`   no_method_successful)r$   r%   replacetitler"   r&   r   r   r   shutilmover   r   r(   r/   rt   )rJ   rH   r   r   r   r   r   current_doc
strategiesstrategy_namestrategy_functemp_outputcurrent_sizer@   r   r   r   r   9  sR   *
r   c                 C   sj   t  }tt| D ]}| | }|j|jj|jjd}||j| | q
|j	|dddddd |
  |S )z7Basic PDF compression using PyMuPDF's built-in methods.ry   rz      Tgarbagecleandeflatedeflate_imagesdeflate_fonts)r   r(   ranger+   new_pagerq   ry   rz   show_pdf_pageru   r   )rJ   rH   new_docpage_numrF   r   r   r   r   r   u  s   r   c                    s  t  }|dkrd}d}n|dkrd}d}nd}d}tt| D ]}| | }|j|jj|jjd}| }	|	rt	|	D ]~\}
}z]|d	 }| 
|}|d
 }tt|}t|j|krw|t|j  t fdd|jD }||tjj}t }|jdv r|d}|j|d|dd |d	 | ||  W q; ty } ztd|
 d|  W Y d}~q;d}~ww ||j| | q |j|dddddd |  |S )z,Compress PDF by optimizing images within it.r   r   i   rR   i  F   i@  r   r   imagec                 3   s    | ]	}t |  V  qd S r   )int)r   dimratior   r   	<genexpr>      z3compress_with_image_optimization.<locals>.<genexpr>RGBALARGBJPEGTr   qualityoptimizeu#      ⚠️  Could not process image r   Nr   r   )r   r(   r   r+   r   rq   ry   rz   
get_imagesrp   extract_imager   rf   rg   maxsizetupleresize
ResamplingLANCZOSmodeconvertru   rv   _replace_imagegetvaluer/   r$   r%   r   r   )rJ   rH   r   r   img_qualitymax_dimensionr   rF   r   
image_list	img_indeximgxref
base_imageimage_bytes	pil_imagenew_size
img_bufferr@   r   r   r   r     sZ   



r   c                 C   s  t  }|dkrd}d}n|dkrd}d}nd}d}tt| D ]T}| | }t |d	 |d	 }|j|d
}	|	d}
tt	|
}t	 }|j
dv rQ|d}|j|d|dd |d |j|jj|jjd}|j|j| d q |j|dddd |  |S )z=Aggressive compression that may reduce quality significantly.r   r]      rR   rV   #      7   H   )matrixpngr   r   r   Tr   r   r   )streamr   )r   r   r   )r   r(   r   r+   Matrix
get_pixmaptobytesr   rf   rg   r   r   ru   rv   r   rq   ry   rz   insert_imager   r   )rJ   rH   r   r   dpir   r   rF   matpiximg_datar   r   r   r   r   r   r     s2   



r   c                 C   s   | dd}t| | tj|}tj| j|d  }||kr(t|| |S t	|}t
||| |  tj|rBt| |S )z:Hybrid approach combining multiple compression techniques.r   z	_temp.pdfr]   )r   r   r"   r&   r   r   r   r   r   r(   r   r   r   remove)rJ   rH   r   	temp_file
basic_sizer   doc_tempr   r   r   r     s   


r   c                 C   sR   | dk r	|  dS | dk r| d ddS | dk r!| d ddS | d ddS )	z'Convert bytes to human readable format.i   z Bi   r   z KBi   @z MBz GBr   )
size_bytesr   r   r   r     s   
r   _compressedc              	   C   s  dd t | D }|std dS tdt| d g }t|dD ]D\}}t j| |}t j|d }t j| | | d	| d
}	td| dt| d|  t	||	|\}
}}|
||
|d q%tdd  td td  tdd |D }tdd |D }tdd |D }td| dt|  tdt||   td|| | d dd dS )z&Compress all PDF files in a directory.c                 S   r
   r   r   r   r   r   r   r     r   z'batch_compress_pdfs.<locals>.<listcomp>z No PDF files found in directory.Nu   🗂️  Found z PDF files to compressr   r   r   r   u   
📄 Processing /r   )r=   r   r   
z2==================================================u   🎯 Batch Compression Summaryc                 s   s    | ]	}|d  rdV  qdS )r   r   Nr   r   rr   r   r   r   ;  r   z&batch_compress_pdfs.<locals>.<genexpr>c                 s   (    | ]}|d  r|d  ddV  qdS )r   r   r   r   Ngetr  r   r   r   r   <     & c                 s   r  )r   r   r   r   Nr  r  r   r   r   r   =  r  u   ✅ Successful: u   💾 Total size reduction: u   📊 Overall compression: r]   r   %)r"   r#   r$   r%   r+   rp   r&   r'   r.   r   r-   sumr   )r5   r   output_suffixr8   resultsr}   r;   r   r   rH   r   r   r   
successfultotal_original_sizetotal_final_sizer   r   r   batch_compress_pdfs  s4   
 

$r%  )r	   T)NrR   r   T)rR   r  )%r"   r)   reportlab.pdfgenr   reportlab.lib.pagesizesr   rf   rr   r   PILr   tempfiler   loggingtypingr   r   basicConfigINFO	getLogger__name__r$   rL   r0   r3   rt   r   booldictr   r   r   r   r   r   r   r%  r   r   r   r   <module>   s@    

cJ)
Q<D*