o
    cJ                     @   s  d dl Z d dlmZ d dlmZmZmZmZmZ ddl	m
Z
mZmZmZ ddlmZmZmZmZ ddlmZ ddlmZmZ dd	lmZmZmZmZmZmZmZ e  d
Z!e " Z#e#$e %d 								d%de&de'de'de(deee)  deee)  de*de*de(defddZ+								d%dede'de'de(deee)  deee)  de*de*de(defdd Z,								d%d!d"de'de'de(deee)  deee)  de*de*de(defd#d$Z-dS )&    N)PathLike)AnyBinaryIOListOptionalSet   )coherence_ratioencoding_languagesmb_encoding_languagesmerge_coherence_ratios)IANA_SUPPORTEDTOO_BIG_SEQUENCETOO_SMALL_SEQUENCETRACE)
mess_ratio)CharsetMatchCharsetMatches)any_specified_encodingcut_sequence_chunks	iana_nameidentify_sig_or_bomis_cp_similaris_multi_byte_encodingshould_strip_sig_or_bomcharset_normalizerz)%(asctime)s | %(levelname)s | %(message)s      皙?TF皙?	sequencessteps
chunk_size	thresholdcp_isolationcp_exclusionpreemptive_behaviourexplainlanguage_thresholdreturnc	           .      C   s  	 t | ttfstdt| |r tj}	tt	 t
t t| }
|
dkrHtd |r<tt	 t
|	p:tj tt| dddg dgS |d ur^ttdd	| d
d |D }ng }|d urvttdd	| dd |D }ng }|
|| krttd|||
 d}|
}|dkr|
| |k rt|
| }t| tk }t| tk}|rttd|
 n|rttd|
 g }|rt| nd }|d ur|| ttd| t }g }g }d }d }d }t }t| \}}|d ur|| ttdt|| |d d|vr|d |t D ]}|r"||vr"q|r,||v r,q||v r3q|| d }||k}|oDt|}|dv rV|sVttd| q|dv rg|sgttd| qzt|}W n t t!fy   ttd| Y qw z9|r|du rt"|du r| d td n	| t|td |d nt"|du r| n| t|d  |d}W n+ t#t$fy } zt |t$sttd|t"| || W Y d }~qd }~ww d}|D ]}t%||rd} nq|rttd|| qt&|sdnt||
t|
| } |o$|d uo$t||
k }!|!r/ttd| tt| d  }"t'|"d!}"d}#d}$g }%g }&zLt(| || ||||||	D ]=}'|%|' |&t)|'||du opdt|  kond!kn   |&d" |kr~|#d7 }#|#|"ks|r|du r nqQW n! t#y } zttd#|t"| |"}#d}$W Y d }~nd }~ww |$s|r|sz| td$d  j*|d%d& W n# t#y } zttd'|t"| || W Y d }~qd }~ww |&rt+|&t|& nd}(|(|ks|#|"krA|| ttd(||#t,|(d) d*d+ |dd|fv r?|$s?t| ||dg |})||kr5|)}n
|dkr=|)}n|)}qttd,|t,|(d) d*d+ |sWt-|}*nt.|}*|*rjttd-|t"|* g }+|dkr|%D ]}'t/|'||*rd.|*nd },|+|, qst0|+}-|-rttd/|-| |t| ||(||-| ||ddfv r|(d0k rtd1| |rtt	 t
|	 t|| g  S ||krtd2| |rtt	 t
|	 t|| g  S qt|dkrK|s|s|r	ttd3 |rtd4|j1 || n2|r!|d u s3|r.|r.|j2|j2ks3|d ur>td5 || n|rKtd6 || |r]td7|3 j1t|d  ntd8 |rott	 t
|	 |S )9Nz4Expected object of type bytes or bytearray, got: {0}r   z<Encoding detection on empty bytes, assuming utf_8 intention.utf_8g        F z`cp_isolation is set. use this flag for debugging purpose. limited list of encoding allowed : %s.z, c                 S      g | ]}t |d qS Fr   .0cp r2   ZG:\Development\the-witcher-3-mod-manager\.venv\lib\site-packages\charset_normalizer\api.py
<listcomp>Z       zfrom_bytes.<locals>.<listcomp>zacp_exclusion is set. use this flag for debugging purpose. limited list of encoding excluded : %s.c                 S   r,   r-   r.   r/   r2   r2   r3   r4   e   r5   z^override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.r   z>Trying to detect encoding from a tiny portion of ({}) byte(s).zIUsing lazy str decoding because the payload is quite large, ({}) byte(s).z@Detected declarative mark in sequence. Priority +1 given for %s.zIDetected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.ascii>   utf_32utf_16z\Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.>   utf_7zREncoding %s won't be tested as-is because detection is unreliable without BOM/SIG.z2Encoding %s does not provide an IncrementalDecoderg    A)encodingz9Code page %s does not fit given bytes sequence at ALL. %sTzW%s is deemed too similar to code page %s and was consider unsuited already. Continuing!zpCode page %s is a multi byte encoding table and it appear that at least one character was encoded using n-bytes.      zaLazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %sg     j@strict)errorsz^LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %szc%s was excluded because of initial chaos probing. Gave up %i time(s). Computed mean chaos is %f %%.d      )ndigitsz=%s passed initial chaos probing. Mean measured chaos is %f %%z&{} should target any language(s) of {},z We detected language {} using {}r   z.Encoding detection: %s is most likely the one.zoEncoding detection: %s is most likely the one as we detected a BOM or SIG within the beginning of the sequence.zONothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.z7Encoding detection: %s will be used as a fallback matchz:Encoding detection: utf_8 will be used as a fallback matchz:Encoding detection: ascii will be used as a fallback matchz]Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.z=Encoding detection: Unable to determine any suitable charset.)4
isinstance	bytearraybytes	TypeErrorformattypeloggerlevel
addHandlerexplain_handlersetLevelr   lendebugremoveHandlerloggingWARNINGr   r   logjoinintr   r   r   appendsetr   r   addr   r   ModuleNotFoundErrorImportErrorstrUnicodeDecodeErrorLookupErrorr   rangemaxr   r   decodesumroundr
   r   r	   r   r:   fingerprintbest).r    r!   r"   r#   r$   r%   r&   r'   r(   Zprevious_logger_levellengthZis_too_small_sequenceZis_too_large_sequenceZprioritized_encodingsspecified_encodingZtestedZtested_but_hard_failureZtested_but_soft_failureZfallback_asciiZfallback_u8Zfallback_specifiedresultsZsig_encodingsig_payloadencoding_ianadecoded_payloadbom_or_sig_availablestrip_sig_or_bomis_multi_byte_decodereZsimilar_soft_failure_testZencoding_soft_failedZr_Zmulti_byte_bonusZmax_chunk_gave_upZearly_stop_countZlazy_str_hard_failureZ	md_chunksZ	md_ratioschunkmean_mess_ratioZfallback_entryZtarget_languagesZ	cd_ratiosZchunk_languagesZcd_ratios_mergedr2   r2   r3   
from_bytes!   s  


















&
























rr   fpc	           	   
   C   s   	 t |  ||||||||	S )N)rr   read)	rs   r!   r"   r#   r$   r%   r&   r'   r(   r2   r2   r3   from_fp  s   ru   pathzPathLike[Any]c	           
      C   sH   	 t | d}	t|	||||||||	W  d    S 1 sw   Y  d S )Nrb)openru   )
rv   r!   r"   r#   r$   r%   r&   r'   r(   rs   r2   r2   r3   	from_path  s   $ry   )r   r   r   NNTFr   ).rR   osr   typingr   r   r   r   r   cdr	   r
   r   r   constantr   r   r   r   mdr   modelsr   r   utilsr   r   r   r   r   r   r   	getLoggerrJ   StreamHandlerrM   setFormatter	FormatterrF   rV   floatr\   boolrr   ru   ry   r2   r2   r2   r3   <module>   s    $


	

   W

	



	
