
    i?1i                        d dl Z d dlZd dlmZmZmZmZmZmZm	Z	 d dl
mZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ  ej8                  e      Z ed      Z ed      Z  G d d      Z!y)    N)BinaryIO	ContainerDictIteratorListOptionalTuple)Rect   )settings)PDFDocumentPDFTextExtractionNotAllowedPDFNoPageLabels)	PDFParser)PDFObjectNotFound)
dict_value)	int_value)
list_value)resolve1)LITPagePagesc                       e Zd ZdZdedededee   ddf
dZdefd	Z	h d
Z
ededed    fd       Ze	 	 	 	 	 ddedeee      dededededed    fd       Zy)PDFPageak  An object that holds the information about a page.

    A PDFPage object is merely a convenience class that has a set
    of keys and values, which describe the properties of a page
    and point to its contents.

    Attributes:
      doc: a PDFDocument object.
      pageid: any Python object that can uniquely identify the page.
      attrs: a dictionary of page attributes.
      contents: a list of PDFStream objects that represents the page content.
      lastmod: the last modified time of the page.
      resources: a dictionary of resources used by the page.
      mediabox: the physical size of the page.
      cropbox: the crop rectangle of the page.
      rotate: the page rotation (in degree).
      annots: the page annotations.
      beads: a chain that represents natural reading order.
      label: the page's label (typically, the logical page number).
    docpageidattrslabelreturnNc                    || _         || _        t        |      | _        || _        t        | j                  j                  d            | _        t        | j                  j                  dt                           | _	        t        | j                  d         | _
        d| j                  v rt        | j                  d         | _        n| j                  | _        t        | j                  j                  dd            dz   dz  | _        | j                  j                  d      | _        | j                  j                  d	      | _        d
| j                  v rt        | j                  d
         }ng }t!        |t"              s|g}|| _        y)zInitialize a page object.

        doc: a PDFDocument object.
        pageid: any Python object that can uniquely identify the page.
        attrs: a dictionary of page attributes.
        label: page label string.
        LastModified	ResourcesMediaBoxCropBoxRotater   ih  AnnotsBContentsN)r   r   r   r   r   r   getlastmoddict	resourcesmediaboxcropboxr   rotateannotsbeads
isinstancelistcontents)selfr   r   r   r   r4   s         Z/home/www/therecruiter.miabetepe.com/venv/lib/python3.12/site-packages/pdfminer/pdfpage.py__init__zPDFPage.__init__-   s*    &



~ >?/7JJNN;/0
 'tzz*'=>

"!)$**Y*?!@DL==DL !!<=CsJjjnnX.ZZ^^C(
#

: 67HH(D) zH&.    c                 N    dj                  | j                  | j                        S )Nz(<PDFPage: Resources={!r}, MediaBox={!r}>)formatr,   r-   )r5   s    r6   __repr__zPDFPage.__repr__O   s"    9@@NNDMM
 	
r8   >   r%   r$   r#   r"   documentc              #      	K   dt         dt        t        t         f   dt        t        t
        t        t         t        t         t         f   f   f      f 	fd		 j                         }d}dj                  v rB 	j                  d   j                        }|D ]  \  }}  ||t        |             d} |suj                  D ]f  }|j                         D ]Q  }	 j                  |      }t        |t               r-|j#                  d      t$        u r  ||t        |             S h y # t        $ r t        j                  d       }Y w xY w# t&        $ r Y w xY ww)	Nobjparentr   c              3   z  K   t        | t              r+| }t        	j                  |            j	                         }n%| j
                  }t        |       j	                         }|j                         D ]  \  }}|j                  v s||vs|||<     |j                  d      }|!t        j                  s|j                  d      }|t        u rBd|v r>t        j                  d|d          t        |d         D ]  } 
||      E d {     y |t        u rt        j                  d|       ||f y y 7 -w)NTypetypeKidszPages: Kids=%rzPage: %r)r2   intr   getobjcopyobjiditemsINHERITABLE_ATTRSr)   r   STRICTLITERAL_PAGESlogdebugr   LITERAL_PAGE)r>   r?   rG   treekv	tree_typecclsr<   searchs           r6   rU   z$PDFPage.create_pages.<locals>.searchX   s#     #s#!(//%"89>>@ 		!#++- ,,.  A---!4-DG  (I  HHV,	M)fn		*DL9#DL1 /A%a../l*		*d+dm# + /s   BD;	D;A=D;D9.D;Fr   TrA   )objectr   strr   r	   rD   get_page_labelsr   	itertoolsrepeatcatalognextxrefs
get_objidsrE   r2   r+   r)   rN   r   )
rT   r<   page_labelspagesobjectsrG   rO   xrefr>   rU   s
   ``       @r6   create_pageszPDFPage.create_pagesV   sk    	$	$!%c6k!2	$eCfd66>.B&B!CCDE	$6	13;3K3K3MK h&&&X--g68H8HIG!( (E4k1BCC   !__. E&ooe4%c40SWWV_5T"%hsD<M"NN	 	'  	1#**40K	1" - sP   AE)D6 ,A6E)#AE1E)6EE)EE)	E&#E)%E&&E)fppagenosmaxpagespasswordcachingcheck_extractablec              #   "  K   t        |      }t        |||      }|j                  s,|rd|z  }	t        |	      d|z  }
t        j                  |
       t        | j                  |            D ]  \  }}|r||vr| |s||dz   k  s y  y w)N)rg   rh   z"Text extraction is not allowed: %rzThe PDF %r contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this caser   )r   r   is_extractabler   rL   warning	enumeraterc   )rT   rd   re   rf   rg   rh   ri   parserr   	error_msgwarning_msgpagenopages                r6   	get_pageszPDFPage.get_pages   s      2&8WE !! @2E	1)<<A DF	F  K('(8(8(=> 	NVTF'1JH
2	 	s   A?BBB)Nr    TF)__name__
__module____qualname____doc__r   rV   r   rW   r7   r;   rI   classmethodr   rc   r   r   rD   boolrs    r8   r6   r   r      s    * / /(. /7= /FNsm /	 /D
# 

 G1K 1HY4G 1 1f  -1"'## )C.)# 	#
 # #  # 
)	# #r8   r   )"rY   loggingtypingr   r   r   r   r   r   r	   pdfminer.utilsr
   rt   r   pdfdocumentr   r   r   	pdfparserr   pdftypesr   r   r   r   r   psparserr   	getLoggerru   rL   rN   rK   r   r{   r8   r6   <module>r      sg      M M M   R R   '       g! 6{GW Wr8   