o
    c2                  
   @   s  d dl Z d dlmZ d dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZ ddlmZ ddlmZmZmZmZ dd	lmZ dd
lmZ ddlmZmZmZmZmZ dede
e fddZdede
e fddZ e dede
e fddZ!e dede
e fddZ"eeddedee#e#f fddZ$	d1de
e de#de
e fddZ%ded e
e de&fd!d"Z'd#ede
e fd$d%Z(d&e
e defd'd(Z)d&edefd)d*Z*ed+d	d2d#ed-e&d.ee defd/d0Z+dS )3    N)IncrementalDecoder)Counter)	lru_cache)r   DictListOptionalTuple   )FREQUENCIES)KO_NAMESLANGUAGE_SUPPORTED_COUNTTOO_SMALL_SEQUENCEZH_NAMES) is_suspiciously_successive_range)CoherenceMatches)is_accentuatedis_latinis_multi_byte_encodingis_unicode_range_secondaryunicode_range	iana_namereturnc                    s   	 t | r	tdtd| j}|dd}i d tddD ]/}|t|g}|rOt	|}|d u r5q t
|du rO|vrCd|< |  d	7  <  d	7  q t fd
dD S )Nz.Function not supported on multi-byte code pagezencodings.{}ignore)errorsr   @      Fr	   c                    s    g | ]}|   d kr|qS )g333333? ).0character_rangecharacter_countZseen_rangesr   YG:\Development\the-witcher-3-mod-manager\.venv\lib\site-packages\charset_normalizer\cd.py
<listcomp>3   s
    z*encoding_unicode_range.<locals>.<listcomp>)r   IOError	importlibimport_moduleformatr   rangedecodebytesr   r   sorted)r   decoderpichunkr   r   r   r!   encoding_unicode_range   s8   
r/   primary_rangec                 C   s@   	 g }t  D ]\}}|D ]}t|| kr||  nqq|S N)r
   itemsr   append)r0   	languageslanguage
characters	characterr   r   r!   unicode_range_languages;   s   
r8   c                 C   s>   	 t | }d }|D ]
}d|vr|} nq	|d u rdgS t|S )NLatinLatin Based)r/   r8   )r   Zunicode_rangesr0   Zspecified_ranger   r   r!   encoding_languagesJ   s   r;   c                 C   sb   	 |  ds|  ds|  ds| dkrdgS |  ds | tv r#dgS |  ds,| tv r/d	gS g S )
NZshift_
iso2022_jpZeuc_jcp932JapanesegbChinese
iso2022_krKorean)
startswithr   r   )r   r   r   r!   mb_encoding_languages^   s   rD   )maxsizer5   c                 C   sD   	 d}d}t |  D ]}|st|rd}|rt|du rd}q	||fS )NFT)r
   r   r   )r5   target_have_accentstarget_pure_latinr7   r   r   r!   get_target_featuress   s   rH   Fr6   ignore_non_latinc                    s   	 g }t dd  D }t D ]6\}}t|\}}|r!|du r!q|du r(|r(qt|}t fdd|D }	|	| }
|
dkrF|||
f qt|dd d	d
}dd |D S )Nc                 s   s    | ]}t |V  qd S r1   )r   )r   r7   r   r   r!   	<genexpr>       z%alphabet_languages.<locals>.<genexpr>Fc                    s   g | ]}| v r|qS r   r   )r   cr6   r   r!   r"      s    z&alphabet_languages.<locals>.<listcomp>g?c                 S      | d S Nr	   r   xr   r   r!   <lambda>       z$alphabet_languages.<locals>.<lambda>Tkeyreversec                 S   s   g | ]}|d  qS )r   r   )r   Zcompatible_languager   r   r!   r"          )anyr
   r2   rH   lenr3   r*   )r6   rI   r4   Zsource_have_accentsr5   Zlanguage_charactersrF   rG   r    character_match_countratior   rM   r!   alphabet_languages   s&   r\   ordered_charactersc                 C   s  	 | t vrtd| d}tt |  }t|}tt |  }|dk}t|td|D ]\}}||vr3q*t |  |}	|| }
t||
 }|du rQt	||	 dkrQq*|du rdt	||	 |d k rd|d7 }q*t |  d|	 }t |  |	d  }|d| }||d  }tt|t|@ }tt|t|@ }t|dkr|dkr|d7 }q*t|dkr|dkr|d7 }q*|t| d	ks|t| d	kr|d7 }q*q*|t| S )
Nz{} not availabler      F   T   r	   g?)
r
   
ValueErrorr&   setrY   zipr'   indexintabs)r5   r]   Zcharacter_approved_countZFREQUENCIES_language_setZordered_characters_countZ target_language_characters_countZlarge_alphabetr7   Zcharacter_rankZcharacter_rank_in_languageZexpected_projection_ratioZcharacter_rank_projectionZcharacters_before_sourceZcharacters_after_sourceZcharacters_beforeZcharacters_afterZbefore_match_countZafter_match_countr   r   r!   characters_popularity_compare   sh   

rg   decoded_sequencec                 C   s   	 i }| D ]?}|  du rqt|}|d u rqd }|D ]}t||du r(|} nq|d u r/|}||vr:| ||< q||  | 7  < qt| S )NF)isalphar   r   lowerlistvalues)rh   Zlayersr7   r   Zlayer_target_rangeZdiscovered_ranger   r   r!   alpha_unicode_split   s.   rm   resultsc                    sh   	 i  | D ]}|D ]}|\}}| vr|g |< q	 |  | q	q fdd D }t|dd ddS )Nc                    s.   g | ]}|t t | t |  d fqS )r_   )roundsumrY   )r   r5   Zper_language_ratiosr   r!   r"   -  s    z*merge_coherence_ratios.<locals>.<listcomp>c                 S   rN   rO   r   rP   r   r   r!   rR   8  rS   z(merge_coherence_ratios.<locals>.<lambda>TrT   )r3   r*   )rn   resultZ
sub_resultr5   r[   merger   rq   r!   merge_coherence_ratios  s   

rt   c                    s   	 t   | D ]}|\}}|dd}| vrg  |<  | | qt fdd D rAg } D ]}||t | f q1|S | S )Nu   — c                 3   s     | ]}t  | d kV  qdS )r	   N)rY   )r   eZindex_resultsr   r!   rJ   K  s    z/filter_alt_coherence_matches.<locals>.<genexpr>)dictreplacer3   rX   max)rn   rr   r5   r[   Z
no_em_nameZfiltered_resultsr   rw   r!   filter_alt_coherence_matches;  s   r{   i   皙?	thresholdlg_inclusionc                 C   s   	 g }d}d}|d ur| dng }d|v rd}|d t| D ]K}t|}| }	tdd |	D }
|
tkr9q!dd	 |	D }|pFt||D ]$}t||}||k rSqG|d
kr[|d7 }|	|t
|df |dkrk nqGq!tt|dd ddS )NFr   ,r:   Tc                 s   s    | ]\}}|V  qd S r1   r   r   rL   or   r   r!   rJ   m  rK   z"coherence_ratio.<locals>.<genexpr>c                 S   s   g | ]\}}|qS r   r   r   r   r   r!   r"   r  rW   z#coherence_ratio.<locals>.<listcomp>g?r	   r_   r`   c                 S   rN   rO   r   rP   r   r   r!   rR     rS   z!coherence_ratio.<locals>.<lambda>rT   )splitremoverm   r   most_commonrp   r   r\   rg   r3   ro   r*   r{   )rh   r}   r~   rn   rI   Zsufficient_match_countZlg_inclusion_listZlayerZsequence_frequenciesr   r    Zpopular_character_orderedr5   r[   r   r   r!   coherence_ratioV  sB   
r   )F)r|   N),r$   codecsr   collectionsr   	functoolsr   typingTypeCounterr   r   r   r   assetsr
   constantr   r   r   r   mdr   modelsr   utilsr   r   r   r   r   strr/   r8   r;   rD   boolrH   r\   floatrg   rm   rt   r{   r   r   r   r   r!   <module>   s^    	'
$
P'