del: venv
This commit is contained in:
parent
0064d2649e
commit
8099bfa97c
@ -1,247 +0,0 @@
|
||||
<#
|
||||
.Synopsis
|
||||
Activate a Python virtual environment for the current PowerShell session.
|
||||
|
||||
.Description
|
||||
Pushes the python executable for a virtual environment to the front of the
|
||||
$Env:PATH environment variable and sets the prompt to signify that you are
|
||||
in a Python virtual environment. Makes use of the command line switches as
|
||||
well as the `pyvenv.cfg` file values present in the virtual environment.
|
||||
|
||||
.Parameter VenvDir
|
||||
Path to the directory that contains the virtual environment to activate. The
|
||||
default value for this is the parent of the directory that the Activate.ps1
|
||||
script is located within.
|
||||
|
||||
.Parameter Prompt
|
||||
The prompt prefix to display when this virtual environment is activated. By
|
||||
default, this prompt is the name of the virtual environment folder (VenvDir)
|
||||
surrounded by parentheses and followed by a single space (ie. '(.venv) ').
|
||||
|
||||
.Example
|
||||
Activate.ps1
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -Verbose
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script,
|
||||
and shows extra information about the activation as it executes.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
|
||||
Activates the Python virtual environment located in the specified location.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -Prompt "MyPython"
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script,
|
||||
and prefixes the current prompt with the specified string (surrounded in
|
||||
parentheses) while the virtual environment is active.
|
||||
|
||||
.Notes
|
||||
On Windows, it may be required to enable this Activate.ps1 script by setting the
|
||||
execution policy for the user. You can do this by issuing the following PowerShell
|
||||
command:
|
||||
|
||||
PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
|
||||
|
||||
For more information on Execution Policies:
|
||||
https://go.microsoft.com/fwlink/?LinkID=135170
|
||||
|
||||
#>
|
||||
Param(
|
||||
[Parameter(Mandatory = $false)]
|
||||
[String]
|
||||
$VenvDir,
|
||||
[Parameter(Mandatory = $false)]
|
||||
[String]
|
||||
$Prompt
|
||||
)
|
||||
|
||||
<# Function declarations --------------------------------------------------- #>
|
||||
|
||||
<#
|
||||
.Synopsis
|
||||
Remove all shell session elements added by the Activate script, including the
|
||||
addition of the virtual environment's Python executable from the beginning of
|
||||
the PATH variable.
|
||||
|
||||
.Parameter NonDestructive
|
||||
If present, do not remove this function from the global namespace for the
|
||||
session.
|
||||
|
||||
#>
|
||||
function global:deactivate ([switch]$NonDestructive) {
|
||||
# Revert to original values
|
||||
|
||||
# The prior prompt:
|
||||
if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
|
||||
Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
|
||||
Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
|
||||
}
|
||||
|
||||
# The prior PYTHONHOME:
|
||||
if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
|
||||
Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
|
||||
Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
|
||||
}
|
||||
|
||||
# The prior PATH:
|
||||
if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
|
||||
Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
|
||||
Remove-Item -Path Env:_OLD_VIRTUAL_PATH
|
||||
}
|
||||
|
||||
# Just remove the VIRTUAL_ENV altogether:
|
||||
if (Test-Path -Path Env:VIRTUAL_ENV) {
|
||||
Remove-Item -Path env:VIRTUAL_ENV
|
||||
}
|
||||
|
||||
# Just remove VIRTUAL_ENV_PROMPT altogether.
|
||||
if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
|
||||
Remove-Item -Path env:VIRTUAL_ENV_PROMPT
|
||||
}
|
||||
|
||||
# Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
|
||||
if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
|
||||
Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
|
||||
}
|
||||
|
||||
# Leave deactivate function in the global namespace if requested:
|
||||
if (-not $NonDestructive) {
|
||||
Remove-Item -Path function:deactivate
|
||||
}
|
||||
}
|
||||
|
||||
<#
|
||||
.Description
|
||||
Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
|
||||
given folder, and returns them in a map.
|
||||
|
||||
For each line in the pyvenv.cfg file, if that line can be parsed into exactly
|
||||
two strings separated by `=` (with any amount of whitespace surrounding the =)
|
||||
then it is considered a `key = value` line. The left hand string is the key,
|
||||
the right hand is the value.
|
||||
|
||||
If the value starts with a `'` or a `"` then the first and last character is
|
||||
stripped from the value before being captured.
|
||||
|
||||
.Parameter ConfigDir
|
||||
Path to the directory that contains the `pyvenv.cfg` file.
|
||||
#>
|
||||
function Get-PyVenvConfig(
|
||||
[String]
|
||||
$ConfigDir
|
||||
) {
|
||||
Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
|
||||
|
||||
# Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
|
||||
$pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
|
||||
|
||||
# An empty map will be returned if no config file is found.
|
||||
$pyvenvConfig = @{ }
|
||||
|
||||
if ($pyvenvConfigPath) {
|
||||
|
||||
Write-Verbose "File exists, parse `key = value` lines"
|
||||
$pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
|
||||
|
||||
$pyvenvConfigContent | ForEach-Object {
|
||||
$keyval = $PSItem -split "\s*=\s*", 2
|
||||
if ($keyval[0] -and $keyval[1]) {
|
||||
$val = $keyval[1]
|
||||
|
||||
# Remove extraneous quotations around a string value.
|
||||
if ("'""".Contains($val.Substring(0, 1))) {
|
||||
$val = $val.Substring(1, $val.Length - 2)
|
||||
}
|
||||
|
||||
$pyvenvConfig[$keyval[0]] = $val
|
||||
Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
|
||||
}
|
||||
}
|
||||
}
|
||||
return $pyvenvConfig
|
||||
}
|
||||
|
||||
|
||||
<# Begin Activate script --------------------------------------------------- #>
|
||||
|
||||
# Determine the containing directory of this script
|
||||
$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
||||
$VenvExecDir = Get-Item -Path $VenvExecPath
|
||||
|
||||
Write-Verbose "Activation script is located in path: '$VenvExecPath'"
|
||||
Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
|
||||
Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
|
||||
|
||||
# Set values required in priority: CmdLine, ConfigFile, Default
|
||||
# First, get the location of the virtual environment, it might not be
|
||||
# VenvExecDir if specified on the command line.
|
||||
if ($VenvDir) {
|
||||
Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
|
||||
}
|
||||
else {
|
||||
Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
|
||||
$VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
|
||||
Write-Verbose "VenvDir=$VenvDir"
|
||||
}
|
||||
|
||||
# Next, read the `pyvenv.cfg` file to determine any required value such
|
||||
# as `prompt`.
|
||||
$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
|
||||
|
||||
# Next, set the prompt from the command line, or the config file, or
|
||||
# just use the name of the virtual environment folder.
|
||||
if ($Prompt) {
|
||||
Write-Verbose "Prompt specified as argument, using '$Prompt'"
|
||||
}
|
||||
else {
|
||||
Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
|
||||
if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
|
||||
Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
|
||||
$Prompt = $pyvenvCfg['prompt'];
|
||||
}
|
||||
else {
|
||||
Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
|
||||
Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
|
||||
$Prompt = Split-Path -Path $venvDir -Leaf
|
||||
}
|
||||
}
|
||||
|
||||
Write-Verbose "Prompt = '$Prompt'"
|
||||
Write-Verbose "VenvDir='$VenvDir'"
|
||||
|
||||
# Deactivate any currently active virtual environment, but leave the
|
||||
# deactivate function in place.
|
||||
deactivate -nondestructive
|
||||
|
||||
# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
|
||||
# that there is an activated venv.
|
||||
$env:VIRTUAL_ENV = $VenvDir
|
||||
|
||||
if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
|
||||
|
||||
Write-Verbose "Setting prompt to '$Prompt'"
|
||||
|
||||
# Set the prompt to include the env name
|
||||
# Make sure _OLD_VIRTUAL_PROMPT is global
|
||||
function global:_OLD_VIRTUAL_PROMPT { "" }
|
||||
Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
|
||||
New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
|
||||
|
||||
function global:prompt {
|
||||
Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
|
||||
_OLD_VIRTUAL_PROMPT
|
||||
}
|
||||
$env:VIRTUAL_ENV_PROMPT = $Prompt
|
||||
}
|
||||
|
||||
# Clear PYTHONHOME
|
||||
if (Test-Path -Path Env:PYTHONHOME) {
|
||||
Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
|
||||
Remove-Item -Path Env:PYTHONHOME
|
||||
}
|
||||
|
||||
# Add the venv to the PATH
|
||||
Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
|
||||
$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
|
@ -1,69 +0,0 @@
|
||||
# This file must be used with "source bin/activate" *from bash*
|
||||
# you cannot run it directly
|
||||
|
||||
deactivate () {
|
||||
# reset old environment variables
|
||||
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
|
||||
PATH="${_OLD_VIRTUAL_PATH:-}"
|
||||
export PATH
|
||||
unset _OLD_VIRTUAL_PATH
|
||||
fi
|
||||
if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
|
||||
PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
|
||||
export PYTHONHOME
|
||||
unset _OLD_VIRTUAL_PYTHONHOME
|
||||
fi
|
||||
|
||||
# This should detect bash and zsh, which have a hash command that must
|
||||
# be called to get it to forget past commands. Without forgetting
|
||||
# past commands the $PATH changes we made may not be respected
|
||||
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
|
||||
hash -r 2> /dev/null
|
||||
fi
|
||||
|
||||
if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
|
||||
PS1="${_OLD_VIRTUAL_PS1:-}"
|
||||
export PS1
|
||||
unset _OLD_VIRTUAL_PS1
|
||||
fi
|
||||
|
||||
unset VIRTUAL_ENV
|
||||
unset VIRTUAL_ENV_PROMPT
|
||||
if [ ! "${1:-}" = "nondestructive" ] ; then
|
||||
# Self destruct!
|
||||
unset -f deactivate
|
||||
fi
|
||||
}
|
||||
|
||||
# unset irrelevant variables
|
||||
deactivate nondestructive
|
||||
|
||||
VIRTUAL_ENV="/home/risen/PythonProjects/Calc3D_by_Risen/venv"
|
||||
export VIRTUAL_ENV
|
||||
|
||||
_OLD_VIRTUAL_PATH="$PATH"
|
||||
PATH="$VIRTUAL_ENV/bin:$PATH"
|
||||
export PATH
|
||||
|
||||
# unset PYTHONHOME if set
|
||||
# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
|
||||
# could use `if (set -u; : $PYTHONHOME) ;` in bash
|
||||
if [ -n "${PYTHONHOME:-}" ] ; then
|
||||
_OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
|
||||
unset PYTHONHOME
|
||||
fi
|
||||
|
||||
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
|
||||
_OLD_VIRTUAL_PS1="${PS1:-}"
|
||||
PS1="(venv) ${PS1:-}"
|
||||
export PS1
|
||||
VIRTUAL_ENV_PROMPT="(venv) "
|
||||
export VIRTUAL_ENV_PROMPT
|
||||
fi
|
||||
|
||||
# This should detect bash and zsh, which have a hash command that must
|
||||
# be called to get it to forget past commands. Without forgetting
|
||||
# past commands the $PATH changes we made may not be respected
|
||||
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
|
||||
hash -r 2> /dev/null
|
||||
fi
|
@ -1,26 +0,0 @@
|
||||
# This file must be used with "source bin/activate.csh" *from csh*.
|
||||
# You cannot run it directly.
|
||||
# Created by Davide Di Blasi <davidedb@gmail.com>.
|
||||
# Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
|
||||
|
||||
alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
|
||||
|
||||
# Unset irrelevant variables.
|
||||
deactivate nondestructive
|
||||
|
||||
setenv VIRTUAL_ENV "/home/risen/PythonProjects/Calc3D_by_Risen/venv"
|
||||
|
||||
set _OLD_VIRTUAL_PATH="$PATH"
|
||||
setenv PATH "$VIRTUAL_ENV/bin:$PATH"
|
||||
|
||||
|
||||
set _OLD_VIRTUAL_PROMPT="$prompt"
|
||||
|
||||
if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
|
||||
set prompt = "(venv) $prompt"
|
||||
setenv VIRTUAL_ENV_PROMPT "(venv) "
|
||||
endif
|
||||
|
||||
alias pydoc python -m pydoc
|
||||
|
||||
rehash
|
@ -1,69 +0,0 @@
|
||||
# This file must be used with "source <venv>/bin/activate.fish" *from fish*
|
||||
# (https://fishshell.com/); you cannot run it directly.
|
||||
|
||||
function deactivate -d "Exit virtual environment and return to normal shell environment"
|
||||
# reset old environment variables
|
||||
if test -n "$_OLD_VIRTUAL_PATH"
|
||||
set -gx PATH $_OLD_VIRTUAL_PATH
|
||||
set -e _OLD_VIRTUAL_PATH
|
||||
end
|
||||
if test -n "$_OLD_VIRTUAL_PYTHONHOME"
|
||||
set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
|
||||
set -e _OLD_VIRTUAL_PYTHONHOME
|
||||
end
|
||||
|
||||
if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
|
||||
set -e _OLD_FISH_PROMPT_OVERRIDE
|
||||
# prevents error when using nested fish instances (Issue #93858)
|
||||
if functions -q _old_fish_prompt
|
||||
functions -e fish_prompt
|
||||
functions -c _old_fish_prompt fish_prompt
|
||||
functions -e _old_fish_prompt
|
||||
end
|
||||
end
|
||||
|
||||
set -e VIRTUAL_ENV
|
||||
set -e VIRTUAL_ENV_PROMPT
|
||||
if test "$argv[1]" != "nondestructive"
|
||||
# Self-destruct!
|
||||
functions -e deactivate
|
||||
end
|
||||
end
|
||||
|
||||
# Unset irrelevant variables.
|
||||
deactivate nondestructive
|
||||
|
||||
set -gx VIRTUAL_ENV "/home/risen/PythonProjects/Calc3D_by_Risen/venv"
|
||||
|
||||
set -gx _OLD_VIRTUAL_PATH $PATH
|
||||
set -gx PATH "$VIRTUAL_ENV/bin" $PATH
|
||||
|
||||
# Unset PYTHONHOME if set.
|
||||
if set -q PYTHONHOME
|
||||
set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
|
||||
set -e PYTHONHOME
|
||||
end
|
||||
|
||||
if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
|
||||
# fish uses a function instead of an env var to generate the prompt.
|
||||
|
||||
# Save the current fish_prompt function as the function _old_fish_prompt.
|
||||
functions -c fish_prompt _old_fish_prompt
|
||||
|
||||
# With the original prompt function renamed, we can override with our own.
|
||||
function fish_prompt
|
||||
# Save the return status of the last command.
|
||||
set -l old_status $status
|
||||
|
||||
# Output the venv prompt; color taken from the blue of the Python logo.
|
||||
printf "%s%s%s" (set_color 4B8BBE) "(venv) " (set_color normal)
|
||||
|
||||
# Restore the return status of the previous command.
|
||||
echo "exit $old_status" | .
|
||||
# Output the original/"old" prompt.
|
||||
_old_fish_prompt
|
||||
end
|
||||
|
||||
set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
|
||||
set -gx VIRTUAL_ENV_PROMPT "(venv) "
|
||||
end
|
@ -1,8 +0,0 @@
|
||||
#!/home/risen/PythonProjects/Calc3D_by_Risen/venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from charset_normalizer.cli import cli_detect
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(cli_detect())
|
@ -1,8 +0,0 @@
|
||||
#!/home/risen/PythonProjects/Calc3D_by_Risen/venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from pip._internal.cli.main import main
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main())
|
@ -1,8 +0,0 @@
|
||||
#!/home/risen/PythonProjects/Calc3D_by_Risen/venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from pip._internal.cli.main import main
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main())
|
@ -1,8 +0,0 @@
|
||||
#!/home/risen/PythonProjects/Calc3D_by_Risen/venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from pip._internal.cli.main import main
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main())
|
@ -1,8 +0,0 @@
|
||||
#!/home/risen/PythonProjects/Calc3D_by_Risen/venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from PySimpleGUI.PySimpleGUI import main_sdk_help
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main_sdk_help())
|
@ -1,8 +0,0 @@
|
||||
#!/home/risen/PythonProjects/Calc3D_by_Risen/venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from PySimpleGUI.PySimpleGUI import _main_entry_point
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(_main_entry_point())
|
@ -1,8 +0,0 @@
|
||||
#!/home/risen/PythonProjects/Calc3D_by_Risen/venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from PySimpleGUI.PySimpleGUI import _main_entry_point
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(_main_entry_point())
|
@ -1,8 +0,0 @@
|
||||
#!/home/risen/PythonProjects/Calc3D_by_Risen/venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from PySimpleGUI.PySimpleGUI import _upgrade_entry_point
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(_upgrade_entry_point())
|
@ -1,8 +0,0 @@
|
||||
#!/home/risen/PythonProjects/Calc3D_by_Risen/venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from PySimpleGUI.PySimpleGUI import main_get_debug_data
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main_get_debug_data())
|
@ -1,8 +0,0 @@
|
||||
#!/home/risen/PythonProjects/Calc3D_by_Risen/venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from PySimpleGUI.PySimpleGUI import main_watermark_off
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main_watermark_off())
|
@ -1,8 +0,0 @@
|
||||
#!/home/risen/PythonProjects/Calc3D_by_Risen/venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from PySimpleGUI.PySimpleGUI import main_watermark_on
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main_watermark_on())
|
@ -1,8 +0,0 @@
|
||||
#!/home/risen/PythonProjects/Calc3D_by_Risen/venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from rsa.cli import decrypt
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(decrypt())
|
@ -1,8 +0,0 @@
|
||||
#!/home/risen/PythonProjects/Calc3D_by_Risen/venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from rsa.cli import encrypt
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(encrypt())
|
@ -1,8 +0,0 @@
|
||||
#!/home/risen/PythonProjects/Calc3D_by_Risen/venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from rsa.cli import keygen
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(keygen())
|
@ -1,8 +0,0 @@
|
||||
#!/home/risen/PythonProjects/Calc3D_by_Risen/venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from rsa.util import private_to_public
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(private_to_public())
|
@ -1,8 +0,0 @@
|
||||
#!/home/risen/PythonProjects/Calc3D_by_Risen/venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from rsa.cli import sign
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(sign())
|
@ -1,8 +0,0 @@
|
||||
#!/home/risen/PythonProjects/Calc3D_by_Risen/venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from rsa.cli import verify
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(verify())
|
@ -1 +0,0 @@
|
||||
python3
|
@ -1 +0,0 @@
|
||||
/usr/bin/python3
|
@ -1 +0,0 @@
|
||||
python3
|
@ -1 +0,0 @@
|
||||
pip
|
@ -1,588 +0,0 @@
|
||||
PySimpleGUI License Agreement
|
||||
|
||||
Version 1.0, Last updated: January 17, 2024
|
||||
|
||||
This PySimpleGUI License Agreement (the "Agreement") governs the use,
|
||||
reproduction, distribution, modification and all other exploitation of
|
||||
PySimpleGUI. The Agreement is made by and between PySimpleSoft, Inc.
|
||||
("Licensor") and the person or legal entity using PySimpleGUI hereunder
|
||||
("Licensee" and, together with Licensor, the "Parties").
|
||||
|
||||
If you are using PySimpleGUI on behalf of a legal entity such as an employer,
|
||||
then "Licensee" means that legal entity, and you represent and warrant that you
|
||||
have the authority and capacity to enter into this Agreement on behalf of
|
||||
Licensee.
|
||||
|
||||
"PySimpleGUI" consists of the following materials:
|
||||
* the PySimpleGUI software library, version 5.0 or later (the "Library");
|
||||
* the PySimpleGUI Library documentation (the "Documentation");
|
||||
* sample programs demonstrating use of the Library (the "Demo Programs"); and
|
||||
* utility programs relating to PySimpleGUI (the "Utilities").
|
||||
|
||||
PySimpleGUI may require you to obtain and use third-party software which is
|
||||
distributed under separate license terms. Any such software is not considered
|
||||
"PySimpleGUI" hereunder and is subject solely to such separate license terms.
|
||||
|
||||
PySimpleGUI is made available to Licensee pursuant to this Agreement for the
|
||||
purpose of (1) pursuant to Section 1.2, enabling Authorized Developers to use
|
||||
the Library in connection with developing Licensee Applications, and to use the
|
||||
Documentation, the Demo Programs and the Utilities in connection therewith; and
|
||||
(2) pursuant to Section 1.3, enabling End Users of the Licensee Applications to
|
||||
execute the Library as a dependency of the Licensee Applications; each as
|
||||
defined and more fully set forth herein and subject to the limitations set
|
||||
forth herein.
|
||||
|
||||
Licensor agrees to license PySimpleGUI to Licensee only in accordance with the
|
||||
terms of this Agreement. By using PySimpleGUI, Licensee agrees to be bound by
|
||||
the terms of this Agreement. If you do not agree to the terms of this
|
||||
Agreement, you may not copy, use, distribute, modify or otherwise attempt to
|
||||
exploit PySimpleGUI.
|
||||
|
||||
Licensee acknowledges that Licensor may from time to time update or modify this
|
||||
Agreement, by publishing a new version of this Agreement on Licensor's website.
|
||||
Licensee may continue to use the version of PySimpleGUI that it previously
|
||||
obtained under the prior version of this Agreement, but any version of
|
||||
PySimpleGUI received or used thereafter shall be subject to the updated version
|
||||
of this Agreement.
|
||||
|
||||
Accordingly, in consideration of the mutual covenants set forth herein, the
|
||||
receipt and sufficiency of which is hereby acknowledged, the Parties agree as
|
||||
follows.
|
||||
|
||||
1. Authorized Developers; License Grants; Limitations.
|
||||
|
||||
1.1. Definitions. As used herein:
|
||||
|
||||
* "Authorized Developer" means any individual person who has registered on
|
||||
Licensor's site at https://PySimpleGUI.com (the "Site") to develop one or
|
||||
more of Licensee's own applications which make use of the Library as a
|
||||
dependency in accordance with Section 1.5 (collectively, "Licensee
|
||||
Applications") and is either (1) a Hobbyist Developer; or (2) a Commercial
|
||||
Developer who has purchased an active PySimpleGUI paid license hereunder, in
|
||||
effect at the time of development, which is fully paid up pursuant to Section
|
||||
3.
|
||||
|
||||
* "Hobbyist Developer" means any individual who uses PySimpleGUI for
|
||||
development purposes solely for either or both of the following: (1) personal
|
||||
(e.g., not on behalf of an employer or other third party), Non-Commercial
|
||||
purposes; or (2) Non-Commercial educational or learning purposes (1 and 2
|
||||
together, the "Permitted No-cost Purposes").
|
||||
|
||||
* "Commercial Developer" means any individual who uses PySimpleGUI for
|
||||
development purposes who is not a Hobbyist Developer.
|
||||
|
||||
As used in this Section 1, "Non-Commercial" means use which is both (1) not on
|
||||
behalf or for the benefit of any company or other organization; and (2) not
|
||||
involving the receipt of any commercial advantage or monetary compensation. If
|
||||
you have questions about whether your contemplated use is "Non-Commercial,"
|
||||
please contact us at license@pysimplegui.com.
|
||||
|
||||
For the avoidance of doubt:
|
||||
|
||||
* Only Authorized Developers (e.g., Hobbyist Developers and Commercial
|
||||
Developers who satisfy the requirements for Authorized Developers) may use
|
||||
PySimpleGUI for development purposes.
|
||||
|
||||
* A Hobbyist Developer may not use PySimpleGUI for any development purpose
|
||||
other than the Permitted No-cost Purposes.
|
||||
|
||||
* Only Commercial Developers may use PySimpleGUI to develop Licensee
|
||||
Applications for any commercial purpose; for the benefit of, on behalf of or
|
||||
on computer hardware belonging to an employing company or other organization;
|
||||
or for commercial educational purposes, such as the development of a paid
|
||||
training course.
|
||||
|
||||
If you have questions about whether your contemplated Licensee Application
|
||||
would be a Permitted No-cost Purpose subject to a Hobbyist Developer license,
|
||||
please contact us at license@pysimplegui.com.
|
||||
|
||||
1.2. Development License Grants. Subject to the terms and conditions of this
|
||||
Agreement:
|
||||
|
||||
1.2.1. Library. Licensor grants Licensee a limited, personal, revocable,
|
||||
non-exclusive, non-sublicensable, non-transferable license during the Term (1)
|
||||
for its Authorized Developers to internally install, use, reproduce and modify
|
||||
the Library to develop Licensee Applications; and (2) to redistribute the
|
||||
Library to recipients of its Licensee Applications ("End Users"); provided,
|
||||
that such redistribution may not include publishing the source code of the
|
||||
Library (in modified or unmodified form) in a publicly accessible website or
|
||||
repository or in other publicly accessible form.
|
||||
|
||||
1.2.2. Documentation. Licensor grants Licensee a limited, personal, revocable,
|
||||
non-exclusive, non-sublicensable, non-transferable license during the Term for
|
||||
its Authorized Developers to internally access, use, and reproduce a reasonable
|
||||
number of copies of the Documentation for the sole purpose of facilitating the
|
||||
use of the Library by Licensee Applications in accordance with this Agreement.
|
||||
For the avoidance of doubt, Licensee may not modify or redistribute the
|
||||
Documentation.
|
||||
|
||||
1.2.3. Demo Programs. Licensor grants Licensee a limited, personal, revocable,
|
||||
non-exclusive, non-sublicensable, non-transferable license during the Term to
|
||||
install, use, execute, reproduce and modify the Demo Programs, and to
|
||||
incorporate modified portions of the Demo Programs into the Licensee
|
||||
Applications; provided, that (1) the Demo Programs may not be used for any
|
||||
purposes other than in connection with the use of the Library; and (2) the Demo
|
||||
Programs may not be (individually or as a whole) redistributed in unmodified
|
||||
form or as a program with substantially similar functionality to the Demo
|
||||
Programs.
|
||||
|
||||
1.2.4. Utilities. Licensor grants Licensee a limited, personal, revocable,
|
||||
non-exclusive, non-sublicensable, non-transferable license during the Term to
|
||||
install, use, execute, reproduce and modify the Utilities, but not to
|
||||
distribute or publish the Utilities or any modified version.
|
||||
|
||||
1.2.5. Developer Key Required. The licenses granted in this Section 1.2 may
|
||||
only be exercised by Authorized Developers within the period of time during
|
||||
which each such Authorized Developer has a then-active Developer Key pursuant
|
||||
to Section 3. Licensor may in its discretion permit recipients of PySimpleGUI
|
||||
to make limited use of it for a limited trial period without a Developer Key.
|
||||
|
||||
1.2.6. Limitations for Hobbyist Developers. For Hobbyist Developers, the
|
||||
licenses granted in this Section 1.2 may only be exercised for the Permitted
|
||||
No-cost Purposes.
|
||||
|
||||
1.2.7. Limitations on Modification of the Library. Licensee's right to modify
|
||||
the Library pursuant to this Section 1.2 is further limited as follows: (a)
|
||||
Licensee may not modify or extend the Library or take any other action which
|
||||
has the effect of enabling bypass of the Library's protection mechanisms
|
||||
requiring the use of valid Developer Keys or Distribution Keys. (b) Licensee
|
||||
explicitly acknowledges and agrees that Licensor's digital signature of the
|
||||
Library is only applicable to the unmodified Library as made available by
|
||||
Licensor, and that any modifications to the Library will result in Licensor's
|
||||
digital signature no longer applying to the modified version.
|
||||
|
||||
1.2.8. Limitations on Distribution of the Library. Licensee's right to
|
||||
distribute the Library (in modified or unmodified form) pursuant to this
|
||||
Section 1.2 is subject to Licensee (a) including the applicable proprietary
|
||||
notices set forth in Section 2.2; and (b) including the PySimpleGUI Flow-Down
|
||||
License Terms set forth in Exhibit A in the license terms that Licensee uses to
|
||||
distribute the Licensee Application.
|
||||
|
||||
1.2.9. Distribution Keys. Commercial Developers may obtain from Licensor a
|
||||
PySimpleGUI distribution key ("Distribution Key") through the Authorized
|
||||
Developer's Site account and utilizing the Distribution Key through the
|
||||
protection mechanism made available in the Library to permit distribution to
|
||||
End Users. The Commercial Developer may use its Distribution Key to enable End
|
||||
Users to install and execute the Licensee Applications, including the Library
|
||||
incorporated therein, without requiring each recipient to obtain a Developer
|
||||
Key or be limited to a trial period as described in Section 1.2.5. Licensee
|
||||
shall be responsible for all activities occurring under Distribution Keys
|
||||
obtained by its Authorized Developers and for the compliance with this
|
||||
Agreement of all Licensee Applications using such Distribution Keys.
|
||||
|
||||
1.3. Run-time End User License Grant. Subject to the terms and conditions of
|
||||
this Agreement, Licensor grants Licensee a limited, personal, revocable,
|
||||
non-exclusive, non-sublicensable, non-transferable license during the Term to
|
||||
install and execute the Library solely for it and its employee End Users to
|
||||
internally use the corresponding Licensee Applications with which the Library
|
||||
is distributed. For the avoidance of doubt, the license set forth in this
|
||||
Section 1.3 does not permit modification, external redistribution, integration
|
||||
of the Library with other software, or any other use of the Library (for
|
||||
development purposes or otherwise) except solely as distributed with the
|
||||
unmodified Licensee Applications; any such activities are permitted only by
|
||||
Authorized Developers and only to the extent permitted by Section 1.2. If the
|
||||
Licensee Application does not include a valid Distribution Key from a
|
||||
Commercial Developer, then the period of use of the Library within the Licensee
|
||||
Application will be limited to a trial period for any End User who does not
|
||||
register as an Authorized Developer hereunder.
|
||||
|
||||
1.4. License Restrictions. The licenses granted to Licensee hereunder are
|
||||
expressly made subject to the following limitations: except as expressly
|
||||
permitted herein, Licensee may not (and shall not permit any third party to):
|
||||
(a) copy all or any portion of PySimpleGUI; (b) modify or translate
|
||||
PySimpleGUI; (c) reverse engineer, decompile or disassemble the Software, in
|
||||
whole or in part, except solely to the extent permitted under applicable law;
|
||||
(d) create derivative works based on PySimpleGUI; (e) publicly display or
|
||||
publish PySimpleGUI; (f) rent, lease, sublicense, sell, distribute, assign,
|
||||
transfer, or otherwise permit access to PySimpleGUI to any third party; (g)
|
||||
bypass or work around any requirements for license keys, limitations on access,
|
||||
or obfuscation or security mechanisms incorporated into PySimpleGUI; (h) use
|
||||
PySimpleGUI for illegal or otherwise harmful purposes, including without
|
||||
limitation harassment, defamation, creation or delivery of unsolicited emails
|
||||
or spam, infringement of third party intellectual property rights or other
|
||||
third party rights, or distribution of viruses, worms, malware or other harmful
|
||||
or destructive software; (i) incorporate PySimpleGUI or any portion thereof
|
||||
into any software that purports to subject it to open source software or
|
||||
similar license terms, including any prior version of PySimpleGUI (modified or
|
||||
unmodified) which was previously distributed under such licenses; or (j)
|
||||
exercise any other right to PySimpleGUI not expressly granted in this
|
||||
Agreement.
|
||||
|
||||
1.5. Licensee Application Prohibitions. Notwithstanding anything else in
|
||||
this Agreement, Licensee shall ensure that Licensee Applications (a) do not
|
||||
have the purpose, intent or functionality of enabling End Users to make further
|
||||
use of PySimpleGUI for their own development purposes or to carry out any
|
||||
activities otherwise restricted or prohibited hereunder; (b) do not have a
|
||||
substantially similar purpose to PySimpleGUI; (c) do not enable End Users to
|
||||
interact, integrate or otherwise develop user interfaces via direct or indirect
|
||||
access to PySimpleGUI's functionality; and (d) are not intended or designed for
|
||||
use in high-risk use cases that could reasonably result in death, severe bodily
|
||||
injury, or other physical property or environmental damage.
|
||||
|
||||
1.6. No Use with Earlier Versions of PySimpleGUI. For the avoidance of
|
||||
doubt, no portions of PySimpleGUI distributed under this Agreement may be used
|
||||
in connection with, or in any way incorporated with or into, any versions of
|
||||
the PySimpleGUI library prior to version 5.0 that have been distributed under
|
||||
the GNU Lesser General Public License.
|
||||
|
||||
1.7. Additional Grant to Python Software Foundation. With regards to
|
||||
portions of PySimpleGUI that Licensor uploads to PyPI, Python Software
|
||||
Foundation ("PSF") may copy and redistribute such portions unmodified on PyPI
|
||||
in the form provided by Licensor, with no further action required by PSF.
|
||||
|
||||
1.8. Prohibition on Training Artificial Intelligence. As used herein,
|
||||
"Artificial Intelligence" means a system or model that is intended to generate
|
||||
or identify patterns in code or data, produce insights or correlations, or make
|
||||
predictions, recommendations, or decisions; in each case, where the system or
|
||||
model operates using machine learning, neural networks, large language models,
|
||||
or other approaches designed to approximate cognitive abilities. Licensee shall
|
||||
not (and shall not directly or indirectly permit or assist anyone else to) use
|
||||
PySimpleGUI, or any part thereof, to train an Artificial Intelligence that is
|
||||
offered to third parties on a commercial basis or as part of a larger
|
||||
commercial offering. The preceding sentence does not prohibit use of
|
||||
PySimpleGUI in conjunction with an Artificial Intelligence in other ways, such
|
||||
as developing a front-end user interface.
|
||||
|
||||
2. Intellectual Property Ownership; Notices.
|
||||
|
||||
2.1. Licensor Ownership. PySimpleGUI is not sold to Licensee, and all rights
|
||||
not expressly granted herein are reserved to Licensor. As between the parties,
|
||||
Licensor and its licensors own all right, title and interest in and to
|
||||
PySimpleGUI and any part thereof, including, without limitation, all
|
||||
copyrights, patents, trademarks, trade secrets or other intellectual property
|
||||
or proprietary rights.
|
||||
|
||||
2.2. Proprietary Notices. Licensee shall not modify or remove any copyright
|
||||
or patent notices or other proprietary notices or markings from any portion of
|
||||
PySimpleGUI (whether modified or unmodified) without Licensor's explicit
|
||||
written permission. Licensor shall ensure that any Licensee Applications that
|
||||
use the Library include a notice in the following form within the Licensee
|
||||
Application as well as any corresponding Licensee documentation or materials:
|
||||
|
||||
For unmodified versions of PySimpleGUI:
|
||||
|
||||
This product includes PySimpleGUI (https://PySimpleGUI.com). PySimpleGUI
|
||||
is Copyright (c) PySimpleSoft, Inc. and/or its licensors. Use of
|
||||
PySimpleGUI is subject to the license terms available at
|
||||
https://PySimpleGUI.com/eula
|
||||
|
||||
PYSIMPLEGUI IS PROVIDED "AS IS," WITHOUT ANY WARRANTIES, WHETHER EXPRESS OR
|
||||
IMPLIED. PYSIMPLESOFT DISCLAIMS ALL IMPLIED WARRANTIES, INCLUDING WITHOUT
|
||||
LIMITATION THE IMPLIED WARRANTIES OF NONINFRINGEMENT, TITLE,
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
For modified versions of PySimpleGUI:
|
||||
|
||||
This product includes a modified version of PySimpleGUI
|
||||
(https://PySimpleGUI.com). PySimpleGUI is Copyright (c) PySimpleSoft, Inc.
|
||||
and/or its licensors. Use of PySimpleGUI is subject to the license terms
|
||||
available at https://PySimpleGUI.com/eula
|
||||
|
||||
PYSIMPLEGUI IS PROVIDED "AS IS," WITHOUT ANY WARRANTIES, WHETHER EXPRESS OR
|
||||
IMPLIED. PYSIMPLESOFT DISCLAIMS ALL IMPLIED WARRANTIES, INCLUDING WITHOUT
|
||||
LIMITATION THE IMPLIED WARRANTIES OF NONINFRINGEMENT, TITLE,
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
If the Licensee Application or the corresponding Licensee documentation or
|
||||
materials include Licensee's copyright notices or other third parties' notices,
|
||||
then Licensee shall include the above notice together with such notices.
|
||||
|
||||
2.3. Licensor Marks. As between the parties hereto, all of Licensor's
|
||||
trademarks and service marks applicable to Licensor or PySimpleGUI
|
||||
(collectively, the "Licensor Marks") are the sole property of Licensor and/or
|
||||
its licensors. Subject to the terms and conditions of this Agreement, Licensor
|
||||
grants Licensee a limited, personal, revocable, non-exclusive,
|
||||
non-sublicensable, non-transferable license to use the Licensor Mark
|
||||
"PySimpleGUI" in connection with Licensee's permitted distribution of the
|
||||
Library hereunder. The license set forth in this Section 2.3 is explicitly
|
||||
conditioned on (a) Licensee's agreement not to challenge Licensor's ownership
|
||||
of the Licensor Marks at any time during the Term or thereafter; (b) Licensee
|
||||
ensuring that any modified version of the Library is clearly and prominently
|
||||
noted as such; (c) Licensee complying with all trademark usage guidelines and
|
||||
requirements that Licensor may publish from time to time; and (d) Licensee
|
||||
immediately correcting incorrect usage of the Licensor Marks upon request from
|
||||
Licensor. Licensee shall immediately cease usage of the Licensor Marks upon
|
||||
written notice thereof from Licensor. All goodwill arising from use of the
|
||||
Licensor Marks shall inure to the benefit of Licensor.
|
||||
|
||||
3. Developer Keys; Fees and Payments.
|
||||
|
||||
3.1. Developer Keys. In order to develop Licensee Applications pursuant to
|
||||
Section 1.2 (and subject to any limited trial period usage as may be permitted
|
||||
by Licensor from time to time), each Authorized Developer shall obtain a
|
||||
PySimpleGUI developer license key ("Developer Key") by registering on the Site
|
||||
as set forth therein. Each Developer Key is personal to the specific Authorized
|
||||
Developer, and Licensee shall not permit Authorized Developers to disclose,
|
||||
share or reuse Developer Keys. For the avoidance of doubt, any disclosure,
|
||||
sharing or reuse of a Developer Key by Licensee's Authorized Developers,
|
||||
whether or not authorized by Licensee, shall be a material breach permitting
|
||||
termination of this Agreement pursuant to Section 8.3. Developer Keys are
|
||||
Licensor's Confidential Information pursuant to Section 5. Developer Keys are
|
||||
limited to a specified time period (which shall be annual from the start date
|
||||
of the Developer Key, unless otherwise explicitly stated by Licensor). Upon the
|
||||
expiration of a Developer Key, the corresponding Authorized Developer may no
|
||||
longer use the Developer Key and must obtain a new Developer Key from the Site
|
||||
in order to continue using PySimpleGUI for development purposes pursuant to
|
||||
Section 1.2.
|
||||
|
||||
3.2. Fees for Commercial Developer Keys; Taxes. Before obtaining each
|
||||
Developer Key for a Commercial Developer, Licensee shall pay to Licensor the
|
||||
corresponding fees as stated on the Site and using the payment mechanism made
|
||||
available on the Site. All payments shall be made in United States dollars. All
|
||||
amounts payable by Licensee hereunder are exclusive of taxes and similar
|
||||
assessments, and Licensee is responsible for all sales, use, and excise taxes,
|
||||
and any other similar taxes of any kind imposed by any federal, state, or local
|
||||
governmental or regulatory authority on any amounts payable by Licensee
|
||||
hereunder, excluding any taxes imposed on Licensor's income.
|
||||
|
||||
3.3. Accuracy of Registration Details. Licensee represents and warrants that
|
||||
(a) all information provided by it and its Authorized Developers when
|
||||
registering for Developer Keys shall be truthful, accurate, complete and not
|
||||
misleading, and (b) it and its Authorized Developers shall not misrepresent
|
||||
their use of PySimpleGUI as qualifying for a Hobbyist Developer Key if their
|
||||
use does not satisfy the Permitted No-cost Purposes.
|
||||
|
||||
4. Support and Updates.
|
||||
|
||||
4.1. Support. Licensor has no obligation hereunder to provide support to
|
||||
Licensee or its Authorized Developers. Authorized Developers may submit
|
||||
Feedback (as defined in Section 5.4) consisting of issues and bug reports to
|
||||
the PySimpleGUI software repository as described on the Site or in the
|
||||
Documentation. Licensor may in its sole discretion address such issues or bug
|
||||
reports in current or future versions of PySimpleGUI, but has no obligation to
|
||||
do so.
|
||||
|
||||
4.2. Updates. Licensor has no obligation hereunder to make available updated
|
||||
versions of PySimpleGUI. In the event that Licensor elects to make available an
|
||||
updated version of PySimpleGUI, then Authorized Developers with a then-active
|
||||
Developer Key may download and use the updated version, and the updated version
|
||||
shall be included in the definition of "PySimpleGUI" thereafter for purposes of
|
||||
this Agreement.
|
||||
|
||||
5. Confidentiality; Feedback.
|
||||
|
||||
5.1. Confidential Information. Licensee acknowledges that portions of
|
||||
PySimpleGUI and certain other materials are confidential as provided herein.
|
||||
"Confidential Information" means any and all information, whether provided in
|
||||
writing, orally, visually, electronically or by other means, related to
|
||||
Licensor's or its licensors' services and/or business that, whether it
|
||||
constitutes a Trade Secret or not, is treated as confidential or secret by
|
||||
Licensor (that is, it is the subject of efforts by Licensor that are reasonable
|
||||
under the circumstances to maintain its secrecy), including, but not limited
|
||||
to, (i) Trade Secrets as defined below; (ii) any and all other information
|
||||
which is disclosed by Licensor to Licensee orally, electronically, visually, or
|
||||
in a document or other tangible form which is either identified as or should be
|
||||
reasonably understood to be confidential and/or proprietary; and, (iii) any
|
||||
notes, extracts, analysis, or materials prepared by Licensee which are copies
|
||||
of or derivative works of Licensor's or its licensors' proprietary or
|
||||
confidential information from which the substance of Confidential Information
|
||||
can be inferred or otherwise understood. Confidential Information shall not
|
||||
include information which Licensee can clearly establish by written evidence:
|
||||
(a) already is lawfully known to or independently developed by Licensee without
|
||||
access to the Confidential Information or Trade Secrets, (b) is disclosed by
|
||||
Licensor in non-confidential published materials, (c) is generally known to the
|
||||
public, or (d) is rightfully obtained from any third party without any
|
||||
obligation of confidentiality.
|
||||
|
||||
5.2. Trade Secrets. As used herein, "Trade Secrets" means all non-public
|
||||
information whether tangible or intangible related to Licensor's and its
|
||||
licensors' services or business that (i) derives economic value, actual or
|
||||
potential, from not being generally known to or readily ascertainable by other
|
||||
persons who can obtain economic value from its disclosure or use; and (ii) is
|
||||
the subject of efforts that are reasonable under the circumstances to maintain
|
||||
its secrecy, which may include, without limitation, (a) marking any information
|
||||
reduced to tangible form clearly and conspicuously with a legend identifying
|
||||
its confidential or trade secret nature; (b) identifying any oral communication
|
||||
as confidential or secret immediately before, during, or after such oral
|
||||
communication; or (c) otherwise treating such information as confidential.
|
||||
|
||||
5.3. Licensee Obligations. Licensee agrees not to disclose Confidential
|
||||
Information or Trade Secrets to any third party and will protect and treat all
|
||||
Confidential Information and Trade Secrets with the highest degree of care.
|
||||
Except as otherwise expressly provided in this Agreement, Licensee will not use
|
||||
or make any copies of Confidential Information or Trade Secrets, in whole or in
|
||||
part, without the prior written authorization of Licensor. Licensee may
|
||||
disclose Confidential Information or Trade Secrets if required by statute,
|
||||
regulation, or order of a court of competent jurisdiction, provided that
|
||||
Licensee provides Licensor with prior notice, discloses only the minimum
|
||||
Confidential Information or Trade Secrets required to be disclosed, and
|
||||
cooperates with Licensor in taking appropriate protective measures. These
|
||||
obligations shall continue for three (3) years following termination or
|
||||
expiration of this Agreement with respect to Confidential Information that does
|
||||
not rise to the level of a Trade Secret and shall continue for Trade Secrets so
|
||||
long as they remain Trade Secrets.
|
||||
|
||||
5.4. Feedback. As used herein, "Feedback" means any comments, questions,
|
||||
suggestions, issues, bug reports, or related feedback provided by Licensee to
|
||||
Licensor relating to PySimpleGUI, including, without limitation, suggesting or
|
||||
recommending changes to any part of PySimpleGUI, or new features or
|
||||
functionality relating thereto. All Feedback is, and will be treated as,
|
||||
non-confidential and non-proprietary, regardless of any markings Licensee may
|
||||
apply to it. Licensee hereby assigns to Licensor all right, title, and interest
|
||||
in, and Licensor is free to use without any attribution or compensation to
|
||||
Licensee, any ideas, know-how, concepts, techniques, or other intellectual
|
||||
property and proprietary rights contained in the Feedback, whether or not
|
||||
patentable, for any purpose whatsoever, including but not limited to,
|
||||
developing, manufacturing, having manufactured, licensing, marketing, and
|
||||
selling, directly or indirectly, products and services using such Feedback. To
|
||||
the extent the foregoing assignment of rights, title and interest in and to
|
||||
Feedback is prohibited by applicable law, Licensee hereby grants Licensor a
|
||||
non-exclusive, perpetual, irrevocable, royalty-free, fully paid-up, worldwide
|
||||
license (including the right to sublicense through multiple tiers) to (a) fully
|
||||
use, practice and exploit those non-assignable rights, title and interest,
|
||||
including, but not limited to, the right to use, reproduce, adapt, publicly
|
||||
perform, publicly display, modify, prepare derivative works, publish, transmit
|
||||
and distribute Feedback, or any portion thereof, in any form, medium or
|
||||
distribution method now known or hereafter existing, known or developed, for
|
||||
any purpose, and to develop, manufacture, have manufactured, license, market,
|
||||
and sell, directly or indirectly, products and services using Feedback; and (b)
|
||||
authorize any such use by others of Feedback, or any portion thereof, in the
|
||||
same manner.
|
||||
|
||||
6. NO LICENSOR WARRANTIES; LIABILITY.
|
||||
|
||||
6.1. DISCLAIMER OF WARRANTIES. PYSIMPLEGUI IS PROVIDED TO LICENSEE "AS IS".
|
||||
LICENSOR DOES NOT MAKE ANY, AND HEREBY SPECIFICALLY DISCLAIMS ANY,
|
||||
REPRESENTATIONS, ENDORSEMENTS, GUARANTEES, OR WARRANTIES, EXPRESS OR IMPLIED,
|
||||
RELATED TO PYSIMPLEGUI INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED WARRANTY OF
|
||||
MERCHANTABILITY, TITLE, FITNESS FOR A PARTICULAR PURPOSE OR NONINFRINGEMENT OF
|
||||
INTELLECTUAL PROPERTY RIGHTS. Licensee acknowledges that Licensor does not
|
||||
guarantee compatibility between PySimpleGUI and any future versions thereof,
|
||||
and that Licensor makes no commitments as to future development, availability,
|
||||
release or licensing of any current or future versions of PySimpleGUI. Licensee
|
||||
will have sole responsibility for the adequate protection and backup of
|
||||
Licensee's software, data and equipment used with PySimpleGUI. The entire risk
|
||||
as to the quality and performance of PySimpleGUI and any obligation with
|
||||
respect to service and support is borne by Licensee. Licensee understands that
|
||||
Software hosted by Licensor for evaluation purposes may not be secure or
|
||||
stable. Licensee waives any claim against Licensor which may arise as a result
|
||||
of Licensee's breach of the foregoing. This Agreement does not grant Licensee
|
||||
any right to any maintenance, services, including without limitation, any
|
||||
support, enhancement, modification, bug fix or update to the Software, and
|
||||
Licensor is under no obligation to provide or inform Licensee of any such
|
||||
maintenance or services.
|
||||
|
||||
6.2. DISCLAIMER OF LIABILITY. LICENSEE EXPLICITLY AGREES THAT, TO THE
|
||||
MAXIMUM EXTENT PERMITTED BY LAW, LICENSOR SHALL NOT BE LIABLE UNDER ANY LEGAL
|
||||
THEORY FOR ANY DAMAGES SUFFERED IN CONNECTION WITH THE USE OF THE SOFTWARE,
|
||||
INCLUDING BUT NOT LIMITED TO ANY LOST PROFITS, LOST SAVINGS OR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, PUNITIVE OR CONSEQUENTIAL DAMAGES,
|
||||
WHETHER RESULTING FROM IMPAIRED OR LOST DATA, SOFTWARE OR COMPUTER FAILURE, THE
|
||||
LICENSEE APPLICATIONS, OR ANY OTHER CAUSE, BY LICENSEE OR ANY OTHER THIRD
|
||||
PARTY, EVEN IF IT HAS BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES.
|
||||
LICENSEE HEREBY EXPRESSLY RELEASES LICENSOR FROM ANY AND ALL LIABILITY OR
|
||||
RESPONSIBILITY TO ANY DAMAGE CAUSED, DIRECTLY OR INDIRECTLY, TO LICENSEE OR ANY
|
||||
THIRD PARTY AS A RESULT OF THE USE OF THE SOFTWARE OR THE INSTALLATION THEREOF
|
||||
INTO LICENSEE'S COMPUTER ENVIRONMENT. IN THE EVENT THAT THE DISCLAIMERS OF
|
||||
LIABILITY SET FORTH HEREIN ARE HELD TO BE UNENFORCEABLE, THE PARTIES AGREE THAT
|
||||
UNDER NO CIRCUMSTANCES SHALL LICENSOR'S AGGREGATE LIABILITY HEREUNDER OR IN
|
||||
CONNECTION WITH THIS AGREEMENT EXCEED THE AMOUNTS PAID BY LICENSEE TO LICENSOR
|
||||
IN THE 12 MONTHS PRECEDING THE DATE THAT A CLAIM FIRST ACCRUES. LICENSEE SHALL
|
||||
BRING ANY CLAIM AGAINST LICENSOR WITHIN 12 MONTHS OF THE DATE THAT THE CLAIM
|
||||
FIRST ACCRUES, AND HEREBY WAIVES ANY CLAIMS THAT IT DOES NOT BRING WITHIN SUCH
|
||||
TIME PERIOD.
|
||||
|
||||
6.3. Essential Terms. THIS SECTION 6 IS AN ESSENTIAL BASIS OF LICENSOR'S
|
||||
DECISION TO OFFER PYSIMPLEGUI, AND SHALL APPLY REGARDLESS OF THE LEGAL THEORY
|
||||
UPON WHICH DAMAGES MAY BE CLAIMED; REGARDLESS OF WHETHER A PARTY KNEW OR SHOULD
|
||||
HAVE KNOWN OF THE POSSIBILITY OF SUCH DAMAGES; AND REGARDLESS OF WHETHER THE
|
||||
FOREGOING LIMITATIONS OF LIABILITY CAUSE ANY REMEDY TO FAIL IN ITS ESSENTIAL
|
||||
PURPOSE.
|
||||
|
||||
7. Indemnification. Licensee agrees to defend, indemnify and hold Licensor
|
||||
and its directors, officers, employees and representatives harmless for any
|
||||
claims, expenses, losses, costs, fees (including attorneys' fees) or damages of
|
||||
any sort resulting from (a) Licensee's breach of this Agreement; (b) Licensee's
|
||||
use of PySimpleGUI or exercise of the license rights granted hereunder; or (c)
|
||||
the Licensee Applications, or Licensee's or any third party's use thereof.
|
||||
|
||||
8. Term and Termination.
|
||||
|
||||
8.1. Term. This Agreement shall commence on the date on which Licensee
|
||||
downloads PySimpleGUI or otherwise obtains a copy of PySimpleGUI, and shall
|
||||
continue thereafter until terminated as set forth herein.
|
||||
|
||||
8.2. Termination by Licensee. Licensee may terminate this Agreement with
|
||||
written notice to Licensor, effective upon Licensee destroying all copies of
|
||||
PySimpleGUI in its possession and refraining from receiving or downloading
|
||||
further copies.
|
||||
|
||||
8.3. Termination for Licensee's Breach. This limited License will
|
||||
immediately terminate without notice if Licensee fails to comply with any
|
||||
obligation of this Agreement. Additionally, if Licensor reasonably suspects
|
||||
that Licensee has breached the Agreement, then Licensor may deliver written
|
||||
notice of the suspected breach to Licensee, and the Agreement shall
|
||||
automatically terminate 10 days following the date of such notice unless
|
||||
Licensee cures the breach to Licensor's satisfaction within such period.
|
||||
|
||||
8.4. Effect of Termination; Survival. Upon termination of this Agreement for
|
||||
any reason, the licenses granted to Licensee with respect to PySimpleGUI shall
|
||||
immediately terminate and Licensee hereby undertakes to: (i) immediately cease
|
||||
to use, distribute or otherwise exploit any part of PySimpleGUI or any modified
|
||||
version thereof; and (ii) promptly destroy and delete any copy of PySimpleGUI
|
||||
installed or copied by Licensee. Sections 2.1, 2.3, 3, 5-7, 8.4, 9 and 10 will
|
||||
survive termination of this Agreement indefinitely in accordance with their
|
||||
terms.
|
||||
|
||||
9. Assignment; Governing Law. The License is personal to Licensee and
|
||||
Licensee agrees not to transfer, sublicense, lease, rent, or assign their
|
||||
rights under this Agreement, and any such attempt shall be null and void.
|
||||
Licensor may assign, transfer, or sublicense this Agreement or any rights or
|
||||
obligations thereunder at any time in its sole discretion. This Agreement shall
|
||||
be governed by and construed in accordance with the laws of the State of North
|
||||
Carolina and the United States of America without regard to the conflicts of
|
||||
laws provisions thereof. The parties expressly exclude the United Nations
|
||||
Convention on Contracts for the International Sale of Goods from this
|
||||
Agreement. All actions arising out of or in connection with this Agreement
|
||||
shall be brought in the state or federal courts residing in Durham, North
|
||||
Carolina, United States of America, and both parties hereby irrevocably consent
|
||||
to the exclusive jurisdiction of such courts and waive any objections as to
|
||||
venue or inconvenience of forum.
|
||||
|
||||
10. Miscellaneous. No changes or modifications to this Agreement by
|
||||
Licensee or waivers of any provision of this Agreement by Licensor shall be
|
||||
effective unless evidenced in a writing referencing this Agreement and signed
|
||||
for and on behalf of Licensor. The failure of Licensor to enforce its rights
|
||||
under this Agreement at any time for any period shall not be construed as a
|
||||
waiver of such rights. There are no third party beneficiaries hereunder. This
|
||||
Agreement constitutes the entire agreement between the parties regarding the
|
||||
subject matter hereof and supersede all negotiations, conversations, or
|
||||
discussions between or among the parties relating to the subject matter of this
|
||||
Agreement. Neither Party relied on any promises or representations, written or
|
||||
oral, of the other party in forming this Agreement, except for those expressly
|
||||
contained herein. In the event that any provision of this Agreement shall be
|
||||
determined to be unenforceable, that provision will be limited or eliminated to
|
||||
the minimum extent necessary so that this Agreement shall otherwise remain in
|
||||
full force and effect and enforceable. Licensee may not distribute, download or
|
||||
otherwise export or re-export PySimpleGUI or any underlying technology except
|
||||
in full compliance with this Agreement, United States laws and regulations and
|
||||
any other applicable laws and regulations. Licensee represents and warrants
|
||||
that it and its Authorized Developers are not located in, under control of, or
|
||||
a national or resident of any country where exercise of the licenses granted
|
||||
hereunder would not comply with all such laws or regulations. It is agreed that
|
||||
because of the proprietary nature of PySimpleGUI, Licensor's remedies at law
|
||||
for a breach by the Licensee of its obligations under this Agreement may be
|
||||
inadequate and that Licensor will, in the event of such breach, be entitled to,
|
||||
in addition to any other remedy available to it, equitable relief, including
|
||||
injunctive relief, without the posting of any bond and in addition to all other
|
||||
remedies provided under this Agreement or available at law.
|
||||
|
||||
Exhibit A
|
||||
|
||||
PySimpleGUI Flow-Down License Terms
|
||||
|
||||
This product (the "Product") includes PySimpleGUI (https://PySimpleGUI.com) or
|
||||
a version of PySimpleGUI modified by the person or legal entity that provided
|
||||
you with this product ("Provider").
|
||||
|
||||
PySimpleGUI is Copyright (c) PySimpleSoft, Inc. and/or its licensors.
|
||||
|
||||
Use of PySimpleGUI is subject to the license terms available at
|
||||
https://PySimpleGUI.com/eula, including all limitations of liability and other
|
||||
terms set forth therein. By using the Product, you acknowledge and agree that
|
||||
PySimpleSoft has no obligation or liability to you regarding the operation,
|
||||
support or maintenance of PySimpleGUI or of the Product. PYSIMPLEGUI IS
|
||||
PROVIDED "AS IS," WITHOUT ANY WARRANTIES, WHETHER EXPRESS OR IMPLIED.
|
||||
PYSIMPLESOFT DISCLAIMS ALL IMPLIED WARRANTIES, INCLUDING WITHOUT LIMITATION THE
|
||||
IMPLIED WARRANTIES OF NONINFRINGEMENT, TITLE, MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE.
|
@ -1,131 +0,0 @@
|
||||
Metadata-Version: 2.1
|
||||
Name: PySimpleGUI
|
||||
Version: 5.0.3
|
||||
Summary: Python GUIs for Humans! PySimpleGUI is the top-rated Python application development environment. Launched in 2018 and actively developed, maintained, and supported in 2024. Transforms tkinter, Qt, WxPython, and Remi into a simple, intuitive, and fun experience for both hobbyists and expert users.
|
||||
Home-page: https://www.PySimpleGUI.com
|
||||
Author: PySimpleSoft Inc.
|
||||
Author-email:
|
||||
License: Proprietary
|
||||
Keywords: GUI UI tkinter Qt WxPython Remi wrapper simple easy beginner novice student graphics
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: Other/Proprietary License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Framework :: PySimpleGUI
|
||||
Classifier: Framework :: PySimpleGUI :: 5
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Topic :: Multimedia :: Graphics
|
||||
Requires-Python: >=3.6
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE.txt
|
||||
Requires-Dist: rsa
|
||||
|
||||
<p align="center">
|
||||
<img src="https://pysimplegui.net/images/big_news_emoji.png">
|
||||
<br>
|
||||
For more information visit <a href="https://home.PySimpleGUI.com">PySimpleGUI.com</a>
|
||||
</p>
|
||||
|
||||
|
||||
|
||||
##
|
||||
|
||||
<p align="center">
|
||||
<img height="250" src="https://pysimplegui.net/images/logos/Logo_Full_Transparent_Cropped.png">
|
||||
<h2 align="center">User Interfaces for Humans<sup>TM</sup></h2>
|
||||
</p>
|
||||
|
||||
# Welcome to PySimpleGUI 5 !!
|
||||
|
||||
Do you use PySimpleGUI 4? [Here is what you need to know.](https://docs.pysimplegui.com/en/latest/readme/sunset/)
|
||||
|
||||
**PySimpleGUI creates desktop applications easily**, enhancing the tkinter, Qt, WxPython, and Remi frameworks with a much simpler programming interface:
|
||||
|
||||
1. PySimpleGUI user interfaces are defined using core Python data types (lists and dictionaries) that are easily understood by beginners.
|
||||
2. PySimpleGUI event handling changes from a complex callback-based model to a simpple message passing one.
|
||||
3. PySimpleGUI uses simple Python code and has no requirement for object oriented architecture.
|
||||
|
||||
PySimpleGUI is more than a GUI library: PySimpleGUI simplifies much of your Python development process. Sure, it makes developing user interfaces much easier, but PySimpleGUI also tames advanced Python functionality (such as threading) and makes it easy for all users to take their Python applications to the next level. PySimpleGUI is a robust toolkit.
|
||||
|
||||
## Introducing PySimpleGUI 5
|
||||
|
||||
For the last 5 years, PySimpleGUI offered free software with the hope of sustaining the
|
||||
company by donations. We appreciate the support we received, but the amount has been too
|
||||
small to support the PySimpleGUI project. For this reason, PySimpleGUI is switching to a
|
||||
subscription model, where commercial users are expected to pay a nominal annual fee.
|
||||
|
||||
|
||||
PySimpleGUI is now part of PySimpleSoft, Inc., whose mission is to make the best Python
|
||||
application develement environment much, much better. Since launching in 2018, PySimpleGUI
|
||||
has helped hobbyists and professionals alike create Python GUIs in a fraction of the time.
|
||||
PySimpleGUI 5 takes PySimpleGUI to the next level, providing hundreds of improvements,
|
||||
including new features, enhanced security, and priority support.
|
||||
|
||||
|
||||
PySimpleGUI 5 is licensed software. As the [License Agreement](license.txt) explains, after a trial
|
||||
period, all PySimpleGUI 5 users must register at PySimpleGUI.com to obtain a Developer Key.
|
||||
For most users (Hobbyist Users), the license is at NO COST. If you are a Commercial User,
|
||||
subscriptions cost a nominal $99/year.
|
||||
|
||||
<p align="center">
|
||||
<img height="350" src="https://github.com/PySimpleGUI/PySimpleGUI_NEW_HOME/assets/65144/0b0dabcc-a538-482b-a226-c194ae30aa24">
|
||||
</p>
|
||||
|
||||
[Subscribe Now](https://pricing.PySimpleGUI.com) and help support the PySimpleGUI community.
|
||||
|
||||
## Examples
|
||||
|
||||
PySmipleGUI users have created thousands of amazing desktop applications. Here are a few screen shots. For more examples, see the [PySimpleGUI gallery](https://gallery.PySimpleGUI.com/).
|
||||
|
||||
<p align="center">
|
||||
<img height="150" src="https://github.com/PySimpleGUI/PSG5/assets/65144/c80eeaed-1029-4e22-83f9-c46fcc6916e6" />
|
||||
|
||||
<img height="150" src="https://github.com/PySimpleGUI/PSG5/assets/65144/dea22a36-b330-4160-96f7-3c7fcb968977" />
|
||||
|
||||
<img height="150" src="https://github.com/PySimpleGUI/PSG5/assets/65144/a9e30456-87aa-4174-90c2-c062f5cf84b9" />
|
||||
</p>
|
||||
|
||||
## Get Started at No Cost
|
||||
|
||||
Whether you are a Hobbyist User or Commercial User, you can start using PySimpleGUI at no cost.
|
||||
To get started with a 30-day trial period, first install Python and then
|
||||
|
||||
python -m pip install pysimplegui
|
||||
|
||||
and run some code, like
|
||||
|
||||
import PySimpleGUI as sg
|
||||
layout = [ [sg.Text('Hello, world!')] ]
|
||||
window = sg.Window('Hello Example', layout)
|
||||
while True:
|
||||
event, values = window.read()
|
||||
if event == sg.WIN_CLOSED:
|
||||
break
|
||||
window.close()
|
||||
|
||||
(You might need to use `python3` instead of `python`.)
|
||||
|
||||
You can try PySimpleGUI for 30 days, after which you will need to Sign Up. Hobbyist users sign up at no cost, and Commercial Users subscribe at $99/year. For more details, see [PySimpleGUI.com/pricing](https://pricing.PySimpleGUI.com).
|
||||
|
||||
## Documentation
|
||||
|
||||
PySimpleGUI provides extensive documentation. Here are some starting points, depending on your needs and expertise:
|
||||
|
||||
* [Documentation](https://docs.pysimplegui.com/) - Extensive PySimpleGUI documenation
|
||||
* [Cookbook](https://cookbook.pysimplegui.com/) - Step-by-step cookbook of PySimpleGUI basics. Find a recipe that is close to what you want to build and use it as a starting point.
|
||||
* [Examples](https://examples.pysimplegui.com/) - Hundreds of sample PySimpeGUI applications.
|
||||
* [SDK Reference](https://sdk.pysimplegui.com/) - details for each PySimpleGUI element
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1,24 +0,0 @@
|
||||
../../../bin/psghelp,sha256=uWk0mn03jWh3tBppFxt7nrvgUAZZerG7XxGijd7OfpE,283
|
||||
../../../bin/psghome,sha256=P_o1Iz6WGTBPhtovE_yuxM9bAkuKtMumCti-bV_2SCY,291
|
||||
../../../bin/psgmain,sha256=P_o1Iz6WGTBPhtovE_yuxM9bAkuKtMumCti-bV_2SCY,291
|
||||
../../../bin/psgupgrade,sha256=0YC_8ZHlTVSF429MGyE9ZTNSD-yv6N6sUNMgTVSNqOA,297
|
||||
../../../bin/psgver,sha256=bDlkjdSEEMnylSezE03zGsxtroPHXvWsJH9CHeSB7uQ,295
|
||||
../../../bin/psgwatermarkoff,sha256=Gjz9osMza58EQNTdbooRvX0ReZuRdipkWaZ3fgXY0Vk,293
|
||||
../../../bin/psgwatermarkon,sha256=QFJCgheNs14TtI3RVX_iC9Lkkkm8v2aryWkf0FyJQXA,291
|
||||
PySimpleGUI-5.0.3.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||
PySimpleGUI-5.0.3.dist-info/LICENSE.txt,sha256=PHtcJvE5Wt9karpjKf_hLcmiAQf7r5xAAJJt8nzEWIU,36666
|
||||
PySimpleGUI-5.0.3.dist-info/METADATA,sha256=qz6ZnnJg8tOp7N5DeeXsLR_zUwxlS30yZREF78_Y19o,6270
|
||||
PySimpleGUI-5.0.3.dist-info/RECORD,,
|
||||
PySimpleGUI-5.0.3.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
PySimpleGUI-5.0.3.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
|
||||
PySimpleGUI-5.0.3.dist-info/entry_points.txt,sha256=mJXcKOJJSyKnVajX7aL5EqyfiMIiOnSzqQYo_xGY7rQ,397
|
||||
PySimpleGUI-5.0.3.dist-info/top_level.txt,sha256=9sLHIbBIAFdT1n02rvUvrGhAsmIAkTfTG-gY5sk4bpc,12
|
||||
PySimpleGUI/CONTRIBUTING.md,sha256=VXkJ0xtZIWAB3OAf87mi3JstnN4l_cC7RozcaNfcko4,1119
|
||||
PySimpleGUI/LICENSE.txt,sha256=PHtcJvE5Wt9karpjKf_hLcmiAQf7r5xAAJJt8nzEWIU,36666
|
||||
PySimpleGUI/PySimpleGUI.py,sha256=M4rPagO1CELkymICUrIz18--R5BDac0dxNFADvrgAxY,2354408
|
||||
PySimpleGUI/README.md,sha256=nClrzaaaX7MCB816fFoXvws5xJQOVxUCuG-gWiHbZ_w,4859
|
||||
PySimpleGUI/__init__.py,sha256=0KCzZoBDYTbc33Ftc4syE0aSW0Jv3CSrUnshN1F-dOM,68
|
||||
PySimpleGUI/__main__.py,sha256=jlmedyhCT08nn6x7cQhI-CZx-bGrlEQYniuf1HuNnak,70
|
||||
PySimpleGUI/__pycache__/PySimpleGUI.cpython-311.pyc,,
|
||||
PySimpleGUI/__pycache__/__init__.cpython-311.pyc,,
|
||||
PySimpleGUI/__pycache__/__main__.cpython-311.pyc,,
|
@ -1,5 +0,0 @@
|
||||
Wheel-Version: 1.0
|
||||
Generator: bdist_wheel (0.41.3)
|
||||
Root-Is-Purelib: true
|
||||
Tag: py3-none-any
|
||||
|
@ -1,8 +0,0 @@
|
||||
[gui_scripts]
|
||||
psghelp = PySimpleGUI.PySimpleGUI:main_sdk_help
|
||||
psghome = PySimpleGUI.PySimpleGUI:_main_entry_point
|
||||
psgmain = PySimpleGUI.PySimpleGUI:_main_entry_point
|
||||
psgupgrade = PySimpleGUI.PySimpleGUI:_upgrade_entry_point
|
||||
psgver = PySimpleGUI.PySimpleGUI:main_get_debug_data
|
||||
psgwatermarkoff = PySimpleGUI.PySimpleGUI:main_watermark_off
|
||||
psgwatermarkon = PySimpleGUI.PySimpleGUI:main_watermark_on
|
@ -1 +0,0 @@
|
||||
PySimpleGUI
|
@ -1,7 +0,0 @@
|
||||
## Contributing to PySimpleGUI
|
||||
|
||||
We are happy to receive issues describing bug reports and feature requests! If your bug report relates to a security vulnerability, please do not file a public issue, and please instead reach out to us at issues@PySimpleGUI.com.
|
||||
|
||||
We do not accept (and do not wish to receive) contributions of user-created or third-party code, including patches, pull requests, or code snippets incorporated into submitted issues. Please do not send us any such code! Bug reports and feature requests should not include any source code.
|
||||
|
||||
If you nonetheless submit any user-created or third-party code to us, (1) you assign to us all rights and title in or relating to the code; and (2) to the extent any such assignment is not fully effective, you hereby grant to us a royalty-free, perpetual, irrevocable, worldwide, unlimited, sublicensable, transferrable license under all intellectual property rights embodied therein or relating thereto, to exploit the code in any manner we choose, including to incorporate the code into PySimpleGUI and to redistribute it under any terms at our discretion.
|
@ -1,588 +0,0 @@
|
||||
PySimpleGUI License Agreement
|
||||
|
||||
Version 1.0, Last updated: January 17, 2024
|
||||
|
||||
This PySimpleGUI License Agreement (the "Agreement") governs the use,
|
||||
reproduction, distribution, modification and all other exploitation of
|
||||
PySimpleGUI. The Agreement is made by and between PySimpleSoft, Inc.
|
||||
("Licensor") and the person or legal entity using PySimpleGUI hereunder
|
||||
("Licensee" and, together with Licensor, the "Parties").
|
||||
|
||||
If you are using PySimpleGUI on behalf of a legal entity such as an employer,
|
||||
then "Licensee" means that legal entity, and you represent and warrant that you
|
||||
have the authority and capacity to enter into this Agreement on behalf of
|
||||
Licensee.
|
||||
|
||||
"PySimpleGUI" consists of the following materials:
|
||||
* the PySimpleGUI software library, version 5.0 or later (the "Library");
|
||||
* the PySimpleGUI Library documentation (the "Documentation");
|
||||
* sample programs demonstrating use of the Library (the "Demo Programs"); and
|
||||
* utility programs relating to PySimpleGUI (the "Utilities").
|
||||
|
||||
PySimpleGUI may require you to obtain and use third-party software which is
|
||||
distributed under separate license terms. Any such software is not considered
|
||||
"PySimpleGUI" hereunder and is subject solely to such separate license terms.
|
||||
|
||||
PySimpleGUI is made available to Licensee pursuant to this Agreement for the
|
||||
purpose of (1) pursuant to Section 1.2, enabling Authorized Developers to use
|
||||
the Library in connection with developing Licensee Applications, and to use the
|
||||
Documentation, the Demo Programs and the Utilities in connection therewith; and
|
||||
(2) pursuant to Section 1.3, enabling End Users of the Licensee Applications to
|
||||
execute the Library as a dependency of the Licensee Applications; each as
|
||||
defined and more fully set forth herein and subject to the limitations set
|
||||
forth herein.
|
||||
|
||||
Licensor agrees to license PySimpleGUI to Licensee only in accordance with the
|
||||
terms of this Agreement. By using PySimpleGUI, Licensee agrees to be bound by
|
||||
the terms of this Agreement. If you do not agree to the terms of this
|
||||
Agreement, you may not copy, use, distribute, modify or otherwise attempt to
|
||||
exploit PySimpleGUI.
|
||||
|
||||
Licensee acknowledges that Licensor may from time to time update or modify this
|
||||
Agreement, by publishing a new version of this Agreement on Licensor's website.
|
||||
Licensee may continue to use the version of PySimpleGUI that it previously
|
||||
obtained under the prior version of this Agreement, but any version of
|
||||
PySimpleGUI received or used thereafter shall be subject to the updated version
|
||||
of this Agreement.
|
||||
|
||||
Accordingly, in consideration of the mutual covenants set forth herein, the
|
||||
receipt and sufficiency of which is hereby acknowledged, the Parties agree as
|
||||
follows.
|
||||
|
||||
1. Authorized Developers; License Grants; Limitations.
|
||||
|
||||
1.1. Definitions. As used herein:
|
||||
|
||||
* "Authorized Developer" means any individual person who has registered on
|
||||
Licensor's site at https://PySimpleGUI.com (the "Site") to develop one or
|
||||
more of Licensee's own applications which make use of the Library as a
|
||||
dependency in accordance with Section 1.5 (collectively, "Licensee
|
||||
Applications") and is either (1) a Hobbyist Developer; or (2) a Commercial
|
||||
Developer who has purchased an active PySimpleGUI paid license hereunder, in
|
||||
effect at the time of development, which is fully paid up pursuant to Section
|
||||
3.
|
||||
|
||||
* "Hobbyist Developer" means any individual who uses PySimpleGUI for
|
||||
development purposes solely for either or both of the following: (1) personal
|
||||
(e.g., not on behalf of an employer or other third party), Non-Commercial
|
||||
purposes; or (2) Non-Commercial educational or learning purposes (1 and 2
|
||||
together, the "Permitted No-cost Purposes").
|
||||
|
||||
* "Commercial Developer" means any individual who uses PySimpleGUI for
|
||||
development purposes who is not a Hobbyist Developer.
|
||||
|
||||
As used in this Section 1, "Non-Commercial" means use which is both (1) not on
|
||||
behalf or for the benefit of any company or other organization; and (2) not
|
||||
involving the receipt of any commercial advantage or monetary compensation. If
|
||||
you have questions about whether your contemplated use is "Non-Commercial,"
|
||||
please contact us at license@pysimplegui.com.
|
||||
|
||||
For the avoidance of doubt:
|
||||
|
||||
* Only Authorized Developers (e.g., Hobbyist Developers and Commercial
|
||||
Developers who satisfy the requirements for Authorized Developers) may use
|
||||
PySimpleGUI for development purposes.
|
||||
|
||||
* A Hobbyist Developer may not use PySimpleGUI for any development purpose
|
||||
other than the Permitted No-cost Purposes.
|
||||
|
||||
* Only Commercial Developers may use PySimpleGUI to develop Licensee
|
||||
Applications for any commercial purpose; for the benefit of, on behalf of or
|
||||
on computer hardware belonging to an employing company or other organization;
|
||||
or for commercial educational purposes, such as the development of a paid
|
||||
training course.
|
||||
|
||||
If you have questions about whether your contemplated Licensee Application
|
||||
would be a Permitted No-cost Purpose subject to a Hobbyist Developer license,
|
||||
please contact us at license@pysimplegui.com.
|
||||
|
||||
1.2. Development License Grants. Subject to the terms and conditions of this
|
||||
Agreement:
|
||||
|
||||
1.2.1. Library. Licensor grants Licensee a limited, personal, revocable,
|
||||
non-exclusive, non-sublicensable, non-transferable license during the Term (1)
|
||||
for its Authorized Developers to internally install, use, reproduce and modify
|
||||
the Library to develop Licensee Applications; and (2) to redistribute the
|
||||
Library to recipients of its Licensee Applications ("End Users"); provided,
|
||||
that such redistribution may not include publishing the source code of the
|
||||
Library (in modified or unmodified form) in a publicly accessible website or
|
||||
repository or in other publicly accessible form.
|
||||
|
||||
1.2.2. Documentation. Licensor grants Licensee a limited, personal, revocable,
|
||||
non-exclusive, non-sublicensable, non-transferable license during the Term for
|
||||
its Authorized Developers to internally access, use, and reproduce a reasonable
|
||||
number of copies of the Documentation for the sole purpose of facilitating the
|
||||
use of the Library by Licensee Applications in accordance with this Agreement.
|
||||
For the avoidance of doubt, Licensee may not modify or redistribute the
|
||||
Documentation.
|
||||
|
||||
1.2.3. Demo Programs. Licensor grants Licensee a limited, personal, revocable,
|
||||
non-exclusive, non-sublicensable, non-transferable license during the Term to
|
||||
install, use, execute, reproduce and modify the Demo Programs, and to
|
||||
incorporate modified portions of the Demo Programs into the Licensee
|
||||
Applications; provided, that (1) the Demo Programs may not be used for any
|
||||
purposes other than in connection with the use of the Library; and (2) the Demo
|
||||
Programs may not be (individually or as a whole) redistributed in unmodified
|
||||
form or as a program with substantially similar functionality to the Demo
|
||||
Programs.
|
||||
|
||||
1.2.4. Utilities. Licensor grants Licensee a limited, personal, revocable,
|
||||
non-exclusive, non-sublicensable, non-transferable license during the Term to
|
||||
install, use, execute, reproduce and modify the Utilities, but not to
|
||||
distribute or publish the Utilities or any modified version.
|
||||
|
||||
1.2.5. Developer Key Required. The licenses granted in this Section 1.2 may
|
||||
only be exercised by Authorized Developers within the period of time during
|
||||
which each such Authorized Developer has a then-active Developer Key pursuant
|
||||
to Section 3. Licensor may in its discretion permit recipients of PySimpleGUI
|
||||
to make limited use of it for a limited trial period without a Developer Key.
|
||||
|
||||
1.2.6. Limitations for Hobbyist Developers. For Hobbyist Developers, the
|
||||
licenses granted in this Section 1.2 may only be exercised for the Permitted
|
||||
No-cost Purposes.
|
||||
|
||||
1.2.7. Limitations on Modification of the Library. Licensee's right to modify
|
||||
the Library pursuant to this Section 1.2 is further limited as follows: (a)
|
||||
Licensee may not modify or extend the Library or take any other action which
|
||||
has the effect of enabling bypass of the Library's protection mechanisms
|
||||
requiring the use of valid Developer Keys or Distribution Keys. (b) Licensee
|
||||
explicitly acknowledges and agrees that Licensor's digital signature of the
|
||||
Library is only applicable to the unmodified Library as made available by
|
||||
Licensor, and that any modifications to the Library will result in Licensor's
|
||||
digital signature no longer applying to the modified version.
|
||||
|
||||
1.2.8. Limitations on Distribution of the Library. Licensee's right to
|
||||
distribute the Library (in modified or unmodified form) pursuant to this
|
||||
Section 1.2 is subject to Licensee (a) including the applicable proprietary
|
||||
notices set forth in Section 2.2; and (b) including the PySimpleGUI Flow-Down
|
||||
License Terms set forth in Exhibit A in the license terms that Licensee uses to
|
||||
distribute the Licensee Application.
|
||||
|
||||
1.2.9. Distribution Keys. Commercial Developers may obtain from Licensor a
|
||||
PySimpleGUI distribution key ("Distribution Key") through the Authorized
|
||||
Developer's Site account and utilizing the Distribution Key through the
|
||||
protection mechanism made available in the Library to permit distribution to
|
||||
End Users. The Commercial Developer may use its Distribution Key to enable End
|
||||
Users to install and execute the Licensee Applications, including the Library
|
||||
incorporated therein, without requiring each recipient to obtain a Developer
|
||||
Key or be limited to a trial period as described in Section 1.2.5. Licensee
|
||||
shall be responsible for all activities occurring under Distribution Keys
|
||||
obtained by its Authorized Developers and for the compliance with this
|
||||
Agreement of all Licensee Applications using such Distribution Keys.
|
||||
|
||||
1.3. Run-time End User License Grant. Subject to the terms and conditions of
|
||||
this Agreement, Licensor grants Licensee a limited, personal, revocable,
|
||||
non-exclusive, non-sublicensable, non-transferable license during the Term to
|
||||
install and execute the Library solely for it and its employee End Users to
|
||||
internally use the corresponding Licensee Applications with which the Library
|
||||
is distributed. For the avoidance of doubt, the license set forth in this
|
||||
Section 1.3 does not permit modification, external redistribution, integration
|
||||
of the Library with other software, or any other use of the Library (for
|
||||
development purposes or otherwise) except solely as distributed with the
|
||||
unmodified Licensee Applications; any such activities are permitted only by
|
||||
Authorized Developers and only to the extent permitted by Section 1.2. If the
|
||||
Licensee Application does not include a valid Distribution Key from a
|
||||
Commercial Developer, then the period of use of the Library within the Licensee
|
||||
Application will be limited to a trial period for any End User who does not
|
||||
register as an Authorized Developer hereunder.
|
||||
|
||||
1.4. License Restrictions. The licenses granted to Licensee hereunder are
|
||||
expressly made subject to the following limitations: except as expressly
|
||||
permitted herein, Licensee may not (and shall not permit any third party to):
|
||||
(a) copy all or any portion of PySimpleGUI; (b) modify or translate
|
||||
PySimpleGUI; (c) reverse engineer, decompile or disassemble the Software, in
|
||||
whole or in part, except solely to the extent permitted under applicable law;
|
||||
(d) create derivative works based on PySimpleGUI; (e) publicly display or
|
||||
publish PySimpleGUI; (f) rent, lease, sublicense, sell, distribute, assign,
|
||||
transfer, or otherwise permit access to PySimpleGUI to any third party; (g)
|
||||
bypass or work around any requirements for license keys, limitations on access,
|
||||
or obfuscation or security mechanisms incorporated into PySimpleGUI; (h) use
|
||||
PySimpleGUI for illegal or otherwise harmful purposes, including without
|
||||
limitation harassment, defamation, creation or delivery of unsolicited emails
|
||||
or spam, infringement of third party intellectual property rights or other
|
||||
third party rights, or distribution of viruses, worms, malware or other harmful
|
||||
or destructive software; (i) incorporate PySimpleGUI or any portion thereof
|
||||
into any software that purports to subject it to open source software or
|
||||
similar license terms, including any prior version of PySimpleGUI (modified or
|
||||
unmodified) which was previously distributed under such licenses; or (j)
|
||||
exercise any other right to PySimpleGUI not expressly granted in this
|
||||
Agreement.
|
||||
|
||||
1.5. Licensee Application Prohibitions. Notwithstanding anything else in
|
||||
this Agreement, Licensee shall ensure that Licensee Applications (a) do not
|
||||
have the purpose, intent or functionality of enabling End Users to make further
|
||||
use of PySimpleGUI for their own development purposes or to carry out any
|
||||
activities otherwise restricted or prohibited hereunder; (b) do not have a
|
||||
substantially similar purpose to PySimpleGUI; (c) do not enable End Users to
|
||||
interact, integrate or otherwise develop user interfaces via direct or indirect
|
||||
access to PySimpleGUI's functionality; and (d) are not intended or designed for
|
||||
use in high-risk use cases that could reasonably result in death, severe bodily
|
||||
injury, or other physical property or environmental damage.
|
||||
|
||||
1.6. No Use with Earlier Versions of PySimpleGUI. For the avoidance of
|
||||
doubt, no portions of PySimpleGUI distributed under this Agreement may be used
|
||||
in connection with, or in any way incorporated with or into, any versions of
|
||||
the PySimpleGUI library prior to version 5.0 that have been distributed under
|
||||
the GNU Lesser General Public License.
|
||||
|
||||
1.7. Additional Grant to Python Software Foundation. With regards to
|
||||
portions of PySimpleGUI that Licensor uploads to PyPI, Python Software
|
||||
Foundation ("PSF") may copy and redistribute such portions unmodified on PyPI
|
||||
in the form provided by Licensor, with no further action required by PSF.
|
||||
|
||||
1.8. Prohibition on Training Artificial Intelligence. As used herein,
|
||||
"Artificial Intelligence" means a system or model that is intended to generate
|
||||
or identify patterns in code or data, produce insights or correlations, or make
|
||||
predictions, recommendations, or decisions; in each case, where the system or
|
||||
model operates using machine learning, neural networks, large language models,
|
||||
or other approaches designed to approximate cognitive abilities. Licensee shall
|
||||
not (and shall not directly or indirectly permit or assist anyone else to) use
|
||||
PySimpleGUI, or any part thereof, to train an Artificial Intelligence that is
|
||||
offered to third parties on a commercial basis or as part of a larger
|
||||
commercial offering. The preceding sentence does not prohibit use of
|
||||
PySimpleGUI in conjunction with an Artificial Intelligence in other ways, such
|
||||
as developing a front-end user interface.
|
||||
|
||||
2. Intellectual Property Ownership; Notices.
|
||||
|
||||
2.1. Licensor Ownership. PySimpleGUI is not sold to Licensee, and all rights
|
||||
not expressly granted herein are reserved to Licensor. As between the parties,
|
||||
Licensor and its licensors own all right, title and interest in and to
|
||||
PySimpleGUI and any part thereof, including, without limitation, all
|
||||
copyrights, patents, trademarks, trade secrets or other intellectual property
|
||||
or proprietary rights.
|
||||
|
||||
2.2. Proprietary Notices. Licensee shall not modify or remove any copyright
|
||||
or patent notices or other proprietary notices or markings from any portion of
|
||||
PySimpleGUI (whether modified or unmodified) without Licensor's explicit
|
||||
written permission. Licensor shall ensure that any Licensee Applications that
|
||||
use the Library include a notice in the following form within the Licensee
|
||||
Application as well as any corresponding Licensee documentation or materials:
|
||||
|
||||
For unmodified versions of PySimpleGUI:
|
||||
|
||||
This product includes PySimpleGUI (https://PySimpleGUI.com). PySimpleGUI
|
||||
is Copyright (c) PySimpleSoft, Inc. and/or its licensors. Use of
|
||||
PySimpleGUI is subject to the license terms available at
|
||||
https://PySimpleGUI.com/eula
|
||||
|
||||
PYSIMPLEGUI IS PROVIDED "AS IS," WITHOUT ANY WARRANTIES, WHETHER EXPRESS OR
|
||||
IMPLIED. PYSIMPLESOFT DISCLAIMS ALL IMPLIED WARRANTIES, INCLUDING WITHOUT
|
||||
LIMITATION THE IMPLIED WARRANTIES OF NONINFRINGEMENT, TITLE,
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
For modified versions of PySimpleGUI:
|
||||
|
||||
This product includes a modified version of PySimpleGUI
|
||||
(https://PySimpleGUI.com). PySimpleGUI is Copyright (c) PySimpleSoft, Inc.
|
||||
and/or its licensors. Use of PySimpleGUI is subject to the license terms
|
||||
available at https://PySimpleGUI.com/eula
|
||||
|
||||
PYSIMPLEGUI IS PROVIDED "AS IS," WITHOUT ANY WARRANTIES, WHETHER EXPRESS OR
|
||||
IMPLIED. PYSIMPLESOFT DISCLAIMS ALL IMPLIED WARRANTIES, INCLUDING WITHOUT
|
||||
LIMITATION THE IMPLIED WARRANTIES OF NONINFRINGEMENT, TITLE,
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
If the Licensee Application or the corresponding Licensee documentation or
|
||||
materials include Licensee's copyright notices or other third parties' notices,
|
||||
then Licensee shall include the above notice together with such notices.
|
||||
|
||||
2.3. Licensor Marks. As between the parties hereto, all of Licensor's
|
||||
trademarks and service marks applicable to Licensor or PySimpleGUI
|
||||
(collectively, the "Licensor Marks") are the sole property of Licensor and/or
|
||||
its licensors. Subject to the terms and conditions of this Agreement, Licensor
|
||||
grants Licensee a limited, personal, revocable, non-exclusive,
|
||||
non-sublicensable, non-transferable license to use the Licensor Mark
|
||||
"PySimpleGUI" in connection with Licensee's permitted distribution of the
|
||||
Library hereunder. The license set forth in this Section 2.3 is explicitly
|
||||
conditioned on (a) Licensee's agreement not to challenge Licensor's ownership
|
||||
of the Licensor Marks at any time during the Term or thereafter; (b) Licensee
|
||||
ensuring that any modified version of the Library is clearly and prominently
|
||||
noted as such; (c) Licensee complying with all trademark usage guidelines and
|
||||
requirements that Licensor may publish from time to time; and (d) Licensee
|
||||
immediately correcting incorrect usage of the Licensor Marks upon request from
|
||||
Licensor. Licensee shall immediately cease usage of the Licensor Marks upon
|
||||
written notice thereof from Licensor. All goodwill arising from use of the
|
||||
Licensor Marks shall inure to the benefit of Licensor.
|
||||
|
||||
3. Developer Keys; Fees and Payments.
|
||||
|
||||
3.1. Developer Keys. In order to develop Licensee Applications pursuant to
|
||||
Section 1.2 (and subject to any limited trial period usage as may be permitted
|
||||
by Licensor from time to time), each Authorized Developer shall obtain a
|
||||
PySimpleGUI developer license key ("Developer Key") by registering on the Site
|
||||
as set forth therein. Each Developer Key is personal to the specific Authorized
|
||||
Developer, and Licensee shall not permit Authorized Developers to disclose,
|
||||
share or reuse Developer Keys. For the avoidance of doubt, any disclosure,
|
||||
sharing or reuse of a Developer Key by Licensee's Authorized Developers,
|
||||
whether or not authorized by Licensee, shall be a material breach permitting
|
||||
termination of this Agreement pursuant to Section 8.3. Developer Keys are
|
||||
Licensor's Confidential Information pursuant to Section 5. Developer Keys are
|
||||
limited to a specified time period (which shall be annual from the start date
|
||||
of the Developer Key, unless otherwise explicitly stated by Licensor). Upon the
|
||||
expiration of a Developer Key, the corresponding Authorized Developer may no
|
||||
longer use the Developer Key and must obtain a new Developer Key from the Site
|
||||
in order to continue using PySimpleGUI for development purposes pursuant to
|
||||
Section 1.2.
|
||||
|
||||
3.2. Fees for Commercial Developer Keys; Taxes. Before obtaining each
|
||||
Developer Key for a Commercial Developer, Licensee shall pay to Licensor the
|
||||
corresponding fees as stated on the Site and using the payment mechanism made
|
||||
available on the Site. All payments shall be made in United States dollars. All
|
||||
amounts payable by Licensee hereunder are exclusive of taxes and similar
|
||||
assessments, and Licensee is responsible for all sales, use, and excise taxes,
|
||||
and any other similar taxes of any kind imposed by any federal, state, or local
|
||||
governmental or regulatory authority on any amounts payable by Licensee
|
||||
hereunder, excluding any taxes imposed on Licensor's income.
|
||||
|
||||
3.3. Accuracy of Registration Details. Licensee represents and warrants that
|
||||
(a) all information provided by it and its Authorized Developers when
|
||||
registering for Developer Keys shall be truthful, accurate, complete and not
|
||||
misleading, and (b) it and its Authorized Developers shall not misrepresent
|
||||
their use of PySimpleGUI as qualifying for a Hobbyist Developer Key if their
|
||||
use does not satisfy the Permitted No-cost Purposes.
|
||||
|
||||
4. Support and Updates.
|
||||
|
||||
4.1. Support. Licensor has no obligation hereunder to provide support to
|
||||
Licensee or its Authorized Developers. Authorized Developers may submit
|
||||
Feedback (as defined in Section 5.4) consisting of issues and bug reports to
|
||||
the PySimpleGUI software repository as described on the Site or in the
|
||||
Documentation. Licensor may in its sole discretion address such issues or bug
|
||||
reports in current or future versions of PySimpleGUI, but has no obligation to
|
||||
do so.
|
||||
|
||||
4.2. Updates. Licensor has no obligation hereunder to make available updated
|
||||
versions of PySimpleGUI. In the event that Licensor elects to make available an
|
||||
updated version of PySimpleGUI, then Authorized Developers with a then-active
|
||||
Developer Key may download and use the updated version, and the updated version
|
||||
shall be included in the definition of "PySimpleGUI" thereafter for purposes of
|
||||
this Agreement.
|
||||
|
||||
5. Confidentiality; Feedback.
|
||||
|
||||
5.1. Confidential Information. Licensee acknowledges that portions of
|
||||
PySimpleGUI and certain other materials are confidential as provided herein.
|
||||
"Confidential Information" means any and all information, whether provided in
|
||||
writing, orally, visually, electronically or by other means, related to
|
||||
Licensor's or its licensors' services and/or business that, whether it
|
||||
constitutes a Trade Secret or not, is treated as confidential or secret by
|
||||
Licensor (that is, it is the subject of efforts by Licensor that are reasonable
|
||||
under the circumstances to maintain its secrecy), including, but not limited
|
||||
to, (i) Trade Secrets as defined below; (ii) any and all other information
|
||||
which is disclosed by Licensor to Licensee orally, electronically, visually, or
|
||||
in a document or other tangible form which is either identified as or should be
|
||||
reasonably understood to be confidential and/or proprietary; and, (iii) any
|
||||
notes, extracts, analysis, or materials prepared by Licensee which are copies
|
||||
of or derivative works of Licensor's or its licensors' proprietary or
|
||||
confidential information from which the substance of Confidential Information
|
||||
can be inferred or otherwise understood. Confidential Information shall not
|
||||
include information which Licensee can clearly establish by written evidence:
|
||||
(a) already is lawfully known to or independently developed by Licensee without
|
||||
access to the Confidential Information or Trade Secrets, (b) is disclosed by
|
||||
Licensor in non-confidential published materials, (c) is generally known to the
|
||||
public, or (d) is rightfully obtained from any third party without any
|
||||
obligation of confidentiality.
|
||||
|
||||
5.2. Trade Secrets. As used herein, "Trade Secrets" means all non-public
|
||||
information whether tangible or intangible related to Licensor's and its
|
||||
licensors' services or business that (i) derives economic value, actual or
|
||||
potential, from not being generally known to or readily ascertainable by other
|
||||
persons who can obtain economic value from its disclosure or use; and (ii) is
|
||||
the subject of efforts that are reasonable under the circumstances to maintain
|
||||
its secrecy, which may include, without limitation, (a) marking any information
|
||||
reduced to tangible form clearly and conspicuously with a legend identifying
|
||||
its confidential or trade secret nature; (b) identifying any oral communication
|
||||
as confidential or secret immediately before, during, or after such oral
|
||||
communication; or (c) otherwise treating such information as confidential.
|
||||
|
||||
5.3. Licensee Obligations. Licensee agrees not to disclose Confidential
|
||||
Information or Trade Secrets to any third party and will protect and treat all
|
||||
Confidential Information and Trade Secrets with the highest degree of care.
|
||||
Except as otherwise expressly provided in this Agreement, Licensee will not use
|
||||
or make any copies of Confidential Information or Trade Secrets, in whole or in
|
||||
part, without the prior written authorization of Licensor. Licensee may
|
||||
disclose Confidential Information or Trade Secrets if required by statute,
|
||||
regulation, or order of a court of competent jurisdiction, provided that
|
||||
Licensee provides Licensor with prior notice, discloses only the minimum
|
||||
Confidential Information or Trade Secrets required to be disclosed, and
|
||||
cooperates with Licensor in taking appropriate protective measures. These
|
||||
obligations shall continue for three (3) years following termination or
|
||||
expiration of this Agreement with respect to Confidential Information that does
|
||||
not rise to the level of a Trade Secret and shall continue for Trade Secrets so
|
||||
long as they remain Trade Secrets.
|
||||
|
||||
5.4. Feedback. As used herein, "Feedback" means any comments, questions,
|
||||
suggestions, issues, bug reports, or related feedback provided by Licensee to
|
||||
Licensor relating to PySimpleGUI, including, without limitation, suggesting or
|
||||
recommending changes to any part of PySimpleGUI, or new features or
|
||||
functionality relating thereto. All Feedback is, and will be treated as,
|
||||
non-confidential and non-proprietary, regardless of any markings Licensee may
|
||||
apply to it. Licensee hereby assigns to Licensor all right, title, and interest
|
||||
in, and Licensor is free to use without any attribution or compensation to
|
||||
Licensee, any ideas, know-how, concepts, techniques, or other intellectual
|
||||
property and proprietary rights contained in the Feedback, whether or not
|
||||
patentable, for any purpose whatsoever, including but not limited to,
|
||||
developing, manufacturing, having manufactured, licensing, marketing, and
|
||||
selling, directly or indirectly, products and services using such Feedback. To
|
||||
the extent the foregoing assignment of rights, title and interest in and to
|
||||
Feedback is prohibited by applicable law, Licensee hereby grants Licensor a
|
||||
non-exclusive, perpetual, irrevocable, royalty-free, fully paid-up, worldwide
|
||||
license (including the right to sublicense through multiple tiers) to (a) fully
|
||||
use, practice and exploit those non-assignable rights, title and interest,
|
||||
including, but not limited to, the right to use, reproduce, adapt, publicly
|
||||
perform, publicly display, modify, prepare derivative works, publish, transmit
|
||||
and distribute Feedback, or any portion thereof, in any form, medium or
|
||||
distribution method now known or hereafter existing, known or developed, for
|
||||
any purpose, and to develop, manufacture, have manufactured, license, market,
|
||||
and sell, directly or indirectly, products and services using Feedback; and (b)
|
||||
authorize any such use by others of Feedback, or any portion thereof, in the
|
||||
same manner.
|
||||
|
||||
6. NO LICENSOR WARRANTIES; LIABILITY.
|
||||
|
||||
6.1. DISCLAIMER OF WARRANTIES. PYSIMPLEGUI IS PROVIDED TO LICENSEE "AS IS".
|
||||
LICENSOR DOES NOT MAKE ANY, AND HEREBY SPECIFICALLY DISCLAIMS ANY,
|
||||
REPRESENTATIONS, ENDORSEMENTS, GUARANTEES, OR WARRANTIES, EXPRESS OR IMPLIED,
|
||||
RELATED TO PYSIMPLEGUI INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED WARRANTY OF
|
||||
MERCHANTABILITY, TITLE, FITNESS FOR A PARTICULAR PURPOSE OR NONINFRINGEMENT OF
|
||||
INTELLECTUAL PROPERTY RIGHTS. Licensee acknowledges that Licensor does not
|
||||
guarantee compatibility between PySimpleGUI and any future versions thereof,
|
||||
and that Licensor makes no commitments as to future development, availability,
|
||||
release or licensing of any current or future versions of PySimpleGUI. Licensee
|
||||
will have sole responsibility for the adequate protection and backup of
|
||||
Licensee's software, data and equipment used with PySimpleGUI. The entire risk
|
||||
as to the quality and performance of PySimpleGUI and any obligation with
|
||||
respect to service and support is borne by Licensee. Licensee understands that
|
||||
Software hosted by Licensor for evaluation purposes may not be secure or
|
||||
stable. Licensee waives any claim against Licensor which may arise as a result
|
||||
of Licensee's breach of the foregoing. This Agreement does not grant Licensee
|
||||
any right to any maintenance, services, including without limitation, any
|
||||
support, enhancement, modification, bug fix or update to the Software, and
|
||||
Licensor is under no obligation to provide or inform Licensee of any such
|
||||
maintenance or services.
|
||||
|
||||
6.2. DISCLAIMER OF LIABILITY. LICENSEE EXPLICITLY AGREES THAT, TO THE
|
||||
MAXIMUM EXTENT PERMITTED BY LAW, LICENSOR SHALL NOT BE LIABLE UNDER ANY LEGAL
|
||||
THEORY FOR ANY DAMAGES SUFFERED IN CONNECTION WITH THE USE OF THE SOFTWARE,
|
||||
INCLUDING BUT NOT LIMITED TO ANY LOST PROFITS, LOST SAVINGS OR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, PUNITIVE OR CONSEQUENTIAL DAMAGES,
|
||||
WHETHER RESULTING FROM IMPAIRED OR LOST DATA, SOFTWARE OR COMPUTER FAILURE, THE
|
||||
LICENSEE APPLICATIONS, OR ANY OTHER CAUSE, BY LICENSEE OR ANY OTHER THIRD
|
||||
PARTY, EVEN IF IT HAS BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES.
|
||||
LICENSEE HEREBY EXPRESSLY RELEASES LICENSOR FROM ANY AND ALL LIABILITY OR
|
||||
RESPONSIBILITY TO ANY DAMAGE CAUSED, DIRECTLY OR INDIRECTLY, TO LICENSEE OR ANY
|
||||
THIRD PARTY AS A RESULT OF THE USE OF THE SOFTWARE OR THE INSTALLATION THEREOF
|
||||
INTO LICENSEE'S COMPUTER ENVIRONMENT. IN THE EVENT THAT THE DISCLAIMERS OF
|
||||
LIABILITY SET FORTH HEREIN ARE HELD TO BE UNENFORCEABLE, THE PARTIES AGREE THAT
|
||||
UNDER NO CIRCUMSTANCES SHALL LICENSOR'S AGGREGATE LIABILITY HEREUNDER OR IN
|
||||
CONNECTION WITH THIS AGREEMENT EXCEED THE AMOUNTS PAID BY LICENSEE TO LICENSOR
|
||||
IN THE 12 MONTHS PRECEDING THE DATE THAT A CLAIM FIRST ACCRUES. LICENSEE SHALL
|
||||
BRING ANY CLAIM AGAINST LICENSOR WITHIN 12 MONTHS OF THE DATE THAT THE CLAIM
|
||||
FIRST ACCRUES, AND HEREBY WAIVES ANY CLAIMS THAT IT DOES NOT BRING WITHIN SUCH
|
||||
TIME PERIOD.
|
||||
|
||||
6.3. Essential Terms. THIS SECTION 6 IS AN ESSENTIAL BASIS OF LICENSOR'S
|
||||
DECISION TO OFFER PYSIMPLEGUI, AND SHALL APPLY REGARDLESS OF THE LEGAL THEORY
|
||||
UPON WHICH DAMAGES MAY BE CLAIMED; REGARDLESS OF WHETHER A PARTY KNEW OR SHOULD
|
||||
HAVE KNOWN OF THE POSSIBILITY OF SUCH DAMAGES; AND REGARDLESS OF WHETHER THE
|
||||
FOREGOING LIMITATIONS OF LIABILITY CAUSE ANY REMEDY TO FAIL IN ITS ESSENTIAL
|
||||
PURPOSE.
|
||||
|
||||
7. Indemnification. Licensee agrees to defend, indemnify and hold Licensor
|
||||
and its directors, officers, employees and representatives harmless for any
|
||||
claims, expenses, losses, costs, fees (including attorneys' fees) or damages of
|
||||
any sort resulting from (a) Licensee's breach of this Agreement; (b) Licensee's
|
||||
use of PySimpleGUI or exercise of the license rights granted hereunder; or (c)
|
||||
the Licensee Applications, or Licensee's or any third party's use thereof.
|
||||
|
||||
8. Term and Termination.
|
||||
|
||||
8.1. Term. This Agreement shall commence on the date on which Licensee
|
||||
downloads PySimpleGUI or otherwise obtains a copy of PySimpleGUI, and shall
|
||||
continue thereafter until terminated as set forth herein.
|
||||
|
||||
8.2. Termination by Licensee. Licensee may terminate this Agreement with
|
||||
written notice to Licensor, effective upon Licensee destroying all copies of
|
||||
PySimpleGUI in its possession and refraining from receiving or downloading
|
||||
further copies.
|
||||
|
||||
8.3. Termination for Licensee's Breach. This limited License will
|
||||
immediately terminate without notice if Licensee fails to comply with any
|
||||
obligation of this Agreement. Additionally, if Licensor reasonably suspects
|
||||
that Licensee has breached the Agreement, then Licensor may deliver written
|
||||
notice of the suspected breach to Licensee, and the Agreement shall
|
||||
automatically terminate 10 days following the date of such notice unless
|
||||
Licensee cures the breach to Licensor's satisfaction within such period.
|
||||
|
||||
8.4. Effect of Termination; Survival. Upon termination of this Agreement for
|
||||
any reason, the licenses granted to Licensee with respect to PySimpleGUI shall
|
||||
immediately terminate and Licensee hereby undertakes to: (i) immediately cease
|
||||
to use, distribute or otherwise exploit any part of PySimpleGUI or any modified
|
||||
version thereof; and (ii) promptly destroy and delete any copy of PySimpleGUI
|
||||
installed or copied by Licensee. Sections 2.1, 2.3, 3, 5-7, 8.4, 9 and 10 will
|
||||
survive termination of this Agreement indefinitely in accordance with their
|
||||
terms.
|
||||
|
||||
9. Assignment; Governing Law. The License is personal to Licensee and
|
||||
Licensee agrees not to transfer, sublicense, lease, rent, or assign their
|
||||
rights under this Agreement, and any such attempt shall be null and void.
|
||||
Licensor may assign, transfer, or sublicense this Agreement or any rights or
|
||||
obligations thereunder at any time in its sole discretion. This Agreement shall
|
||||
be governed by and construed in accordance with the laws of the State of North
|
||||
Carolina and the United States of America without regard to the conflicts of
|
||||
laws provisions thereof. The parties expressly exclude the United Nations
|
||||
Convention on Contracts for the International Sale of Goods from this
|
||||
Agreement. All actions arising out of or in connection with this Agreement
|
||||
shall be brought in the state or federal courts residing in Durham, North
|
||||
Carolina, United States of America, and both parties hereby irrevocably consent
|
||||
to the exclusive jurisdiction of such courts and waive any objections as to
|
||||
venue or inconvenience of forum.
|
||||
|
||||
10. Miscellaneous. No changes or modifications to this Agreement by
|
||||
Licensee or waivers of any provision of this Agreement by Licensor shall be
|
||||
effective unless evidenced in a writing referencing this Agreement and signed
|
||||
for and on behalf of Licensor. The failure of Licensor to enforce its rights
|
||||
under this Agreement at any time for any period shall not be construed as a
|
||||
waiver of such rights. There are no third party beneficiaries hereunder. This
|
||||
Agreement constitutes the entire agreement between the parties regarding the
|
||||
subject matter hereof and supersede all negotiations, conversations, or
|
||||
discussions between or among the parties relating to the subject matter of this
|
||||
Agreement. Neither Party relied on any promises or representations, written or
|
||||
oral, of the other party in forming this Agreement, except for those expressly
|
||||
contained herein. In the event that any provision of this Agreement shall be
|
||||
determined to be unenforceable, that provision will be limited or eliminated to
|
||||
the minimum extent necessary so that this Agreement shall otherwise remain in
|
||||
full force and effect and enforceable. Licensee may not distribute, download or
|
||||
otherwise export or re-export PySimpleGUI or any underlying technology except
|
||||
in full compliance with this Agreement, United States laws and regulations and
|
||||
any other applicable laws and regulations. Licensee represents and warrants
|
||||
that it and its Authorized Developers are not located in, under control of, or
|
||||
a national or resident of any country where exercise of the licenses granted
|
||||
hereunder would not comply with all such laws or regulations. It is agreed that
|
||||
because of the proprietary nature of PySimpleGUI, Licensor's remedies at law
|
||||
for a breach by the Licensee of its obligations under this Agreement may be
|
||||
inadequate and that Licensor will, in the event of such breach, be entitled to,
|
||||
in addition to any other remedy available to it, equitable relief, including
|
||||
injunctive relief, without the posting of any bond and in addition to all other
|
||||
remedies provided under this Agreement or available at law.
|
||||
|
||||
Exhibit A
|
||||
|
||||
PySimpleGUI Flow-Down License Terms
|
||||
|
||||
This product (the "Product") includes PySimpleGUI (https://PySimpleGUI.com) or
|
||||
a version of PySimpleGUI modified by the person or legal entity that provided
|
||||
you with this product ("Provider").
|
||||
|
||||
PySimpleGUI is Copyright (c) PySimpleSoft, Inc. and/or its licensors.
|
||||
|
||||
Use of PySimpleGUI is subject to the license terms available at
|
||||
https://PySimpleGUI.com/eula, including all limitations of liability and other
|
||||
terms set forth therein. By using the Product, you acknowledge and agree that
|
||||
PySimpleSoft has no obligation or liability to you regarding the operation,
|
||||
support or maintenance of PySimpleGUI or of the Product. PYSIMPLEGUI IS
|
||||
PROVIDED "AS IS," WITHOUT ANY WARRANTIES, WHETHER EXPRESS OR IMPLIED.
|
||||
PYSIMPLESOFT DISCLAIMS ALL IMPLIED WARRANTIES, INCLUDING WITHOUT LIMITATION THE
|
||||
IMPLIED WARRANTIES OF NONINFRINGEMENT, TITLE, MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE.
|
File diff suppressed because one or more lines are too long
@ -1,102 +0,0 @@
|
||||
<p align="center">
|
||||
<img src="https://pysimplegui.net/images/big_news_emoji.png">
|
||||
<br>
|
||||
For more information visit <a href="https://home.PySimpleGUI.com">PySimpleGUI.com</a>
|
||||
</p>
|
||||
|
||||
|
||||
|
||||
##
|
||||
|
||||
<p align="center">
|
||||
<img height="250" src="https://pysimplegui.net/images/logos/Logo_Full_Transparent_Cropped.png">
|
||||
<h2 align="center">User Interfaces for Humans<sup>TM</sup></h2>
|
||||
</p>
|
||||
|
||||
# Welcome to PySimpleGUI 5 !!
|
||||
|
||||
Do you use PySimpleGUI 4? [Here is what you need to know.](https://docs.pysimplegui.com/en/latest/readme/sunset/)
|
||||
|
||||
**PySimpleGUI creates desktop applications easily**, enhancing the tkinter, Qt, WxPython, and Remi frameworks with a much simpler programming interface:
|
||||
|
||||
1. PySimpleGUI user interfaces are defined using core Python data types (lists and dictionaries) that are easily understood by beginners.
|
||||
2. PySimpleGUI event handling changes from a complex callback-based model to a simpple message passing one.
|
||||
3. PySimpleGUI uses simple Python code and has no requirement for object oriented architecture.
|
||||
|
||||
PySimpleGUI is more than a GUI library: PySimpleGUI simplifies much of your Python development process. Sure, it makes developing user interfaces much easier, but PySimpleGUI also tames advanced Python functionality (such as threading) and makes it easy for all users to take their Python applications to the next level. PySimpleGUI is a robust toolkit.
|
||||
|
||||
## Introducing PySimpleGUI 5
|
||||
|
||||
For the last 5 years, PySimpleGUI offered free software with the hope of sustaining the
|
||||
company by donations. We appreciate the support we received, but the amount has been too
|
||||
small to support the PySimpleGUI project. For this reason, PySimpleGUI is switching to a
|
||||
subscription model, where commercial users are expected to pay a nominal annual fee.
|
||||
|
||||
|
||||
PySimpleGUI is now part of PySimpleSoft, Inc., whose mission is to make the best Python
|
||||
application develement environment much, much better. Since launching in 2018, PySimpleGUI
|
||||
has helped hobbyists and professionals alike create Python GUIs in a fraction of the time.
|
||||
PySimpleGUI 5 takes PySimpleGUI to the next level, providing hundreds of improvements,
|
||||
including new features, enhanced security, and priority support.
|
||||
|
||||
|
||||
PySimpleGUI 5 is licensed software. As the [License Agreement](license.txt) explains, after a trial
|
||||
period, all PySimpleGUI 5 users must register at PySimpleGUI.com to obtain a Developer Key.
|
||||
For most users (Hobbyist Users), the license is at NO COST. If you are a Commercial User,
|
||||
subscriptions cost a nominal $99/year.
|
||||
|
||||
<p align="center">
|
||||
<img height="350" src="https://github.com/PySimpleGUI/PySimpleGUI_NEW_HOME/assets/65144/0b0dabcc-a538-482b-a226-c194ae30aa24">
|
||||
</p>
|
||||
|
||||
[Subscribe Now](https://pricing.PySimpleGUI.com) and help support the PySimpleGUI community.
|
||||
|
||||
## Examples
|
||||
|
||||
PySmipleGUI users have created thousands of amazing desktop applications. Here are a few screen shots. For more examples, see the [PySimpleGUI gallery](https://gallery.PySimpleGUI.com/).
|
||||
|
||||
<p align="center">
|
||||
<img height="150" src="https://github.com/PySimpleGUI/PSG5/assets/65144/c80eeaed-1029-4e22-83f9-c46fcc6916e6" />
|
||||
|
||||
<img height="150" src="https://github.com/PySimpleGUI/PSG5/assets/65144/dea22a36-b330-4160-96f7-3c7fcb968977" />
|
||||
|
||||
<img height="150" src="https://github.com/PySimpleGUI/PSG5/assets/65144/a9e30456-87aa-4174-90c2-c062f5cf84b9" />
|
||||
</p>
|
||||
|
||||
## Get Started at No Cost
|
||||
|
||||
Whether you are a Hobbyist User or Commercial User, you can start using PySimpleGUI at no cost.
|
||||
To get started with a 30-day trial period, first install Python and then
|
||||
|
||||
python -m pip install pysimplegui
|
||||
|
||||
and run some code, like
|
||||
|
||||
import PySimpleGUI as sg
|
||||
layout = [ [sg.Text('Hello, world!')] ]
|
||||
window = sg.Window('Hello Example', layout)
|
||||
while True:
|
||||
event, values = window.read()
|
||||
if event == sg.WIN_CLOSED:
|
||||
break
|
||||
window.close()
|
||||
|
||||
(You might need to use `python3` instead of `python`.)
|
||||
|
||||
You can try PySimpleGUI for 30 days, after which you will need to Sign Up. Hobbyist users sign up at no cost, and Commercial Users subscribe at $99/year. For more details, see [PySimpleGUI.com/pricing](https://pricing.PySimpleGUI.com).
|
||||
|
||||
## Documentation
|
||||
|
||||
PySimpleGUI provides extensive documentation. Here are some starting points, depending on your needs and expertise:
|
||||
|
||||
* [Documentation](https://docs.pysimplegui.com/) - Extensive PySimpleGUI documenation
|
||||
* [Cookbook](https://cookbook.pysimplegui.com/) - Step-by-step cookbook of PySimpleGUI basics. Find a recipe that is close to what you want to build and use it as a starting point.
|
||||
* [Examples](https://examples.pysimplegui.com/) - Hundreds of sample PySimpeGUI applications.
|
||||
* [SDK Reference](https://sdk.pysimplegui.com/) - details for each PySimpleGUI element
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1,3 +0,0 @@
|
||||
from .PySimpleGUI import *
|
||||
from .PySimpleGUI import __version__
|
||||
|
@ -1,4 +0,0 @@
|
||||
from .PySimpleGUI import *
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,222 +0,0 @@
|
||||
# don't import any costly modules
|
||||
import sys
|
||||
import os
|
||||
|
||||
|
||||
is_pypy = '__pypy__' in sys.builtin_module_names
|
||||
|
||||
|
||||
def warn_distutils_present():
|
||||
if 'distutils' not in sys.modules:
|
||||
return
|
||||
if is_pypy and sys.version_info < (3, 7):
|
||||
# PyPy for 3.6 unconditionally imports distutils, so bypass the warning
|
||||
# https://foss.heptapod.net/pypy/pypy/-/blob/be829135bc0d758997b3566062999ee8b23872b4/lib-python/3/site.py#L250
|
||||
return
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"Distutils was imported before Setuptools, but importing Setuptools "
|
||||
"also replaces the `distutils` module in `sys.modules`. This may lead "
|
||||
"to undesirable behaviors or errors. To avoid these issues, avoid "
|
||||
"using distutils directly, ensure that setuptools is installed in the "
|
||||
"traditional way (e.g. not an editable install), and/or make sure "
|
||||
"that setuptools is always imported before distutils."
|
||||
)
|
||||
|
||||
|
||||
def clear_distutils():
|
||||
if 'distutils' not in sys.modules:
|
||||
return
|
||||
import warnings
|
||||
|
||||
warnings.warn("Setuptools is replacing distutils.")
|
||||
mods = [
|
||||
name
|
||||
for name in sys.modules
|
||||
if name == "distutils" or name.startswith("distutils.")
|
||||
]
|
||||
for name in mods:
|
||||
del sys.modules[name]
|
||||
|
||||
|
||||
def enabled():
|
||||
"""
|
||||
Allow selection of distutils by environment variable.
|
||||
"""
|
||||
which = os.environ.get('SETUPTOOLS_USE_DISTUTILS', 'local')
|
||||
return which == 'local'
|
||||
|
||||
|
||||
def ensure_local_distutils():
|
||||
import importlib
|
||||
|
||||
clear_distutils()
|
||||
|
||||
# With the DistutilsMetaFinder in place,
|
||||
# perform an import to cause distutils to be
|
||||
# loaded from setuptools._distutils. Ref #2906.
|
||||
with shim():
|
||||
importlib.import_module('distutils')
|
||||
|
||||
# check that submodules load as expected
|
||||
core = importlib.import_module('distutils.core')
|
||||
assert '_distutils' in core.__file__, core.__file__
|
||||
assert 'setuptools._distutils.log' not in sys.modules
|
||||
|
||||
|
||||
def do_override():
|
||||
"""
|
||||
Ensure that the local copy of distutils is preferred over stdlib.
|
||||
|
||||
See https://github.com/pypa/setuptools/issues/417#issuecomment-392298401
|
||||
for more motivation.
|
||||
"""
|
||||
if enabled():
|
||||
warn_distutils_present()
|
||||
ensure_local_distutils()
|
||||
|
||||
|
||||
class _TrivialRe:
|
||||
def __init__(self, *patterns):
|
||||
self._patterns = patterns
|
||||
|
||||
def match(self, string):
|
||||
return all(pat in string for pat in self._patterns)
|
||||
|
||||
|
||||
class DistutilsMetaFinder:
|
||||
def find_spec(self, fullname, path, target=None):
|
||||
# optimization: only consider top level modules and those
|
||||
# found in the CPython test suite.
|
||||
if path is not None and not fullname.startswith('test.'):
|
||||
return
|
||||
|
||||
method_name = 'spec_for_{fullname}'.format(**locals())
|
||||
method = getattr(self, method_name, lambda: None)
|
||||
return method()
|
||||
|
||||
def spec_for_distutils(self):
|
||||
if self.is_cpython():
|
||||
return
|
||||
|
||||
import importlib
|
||||
import importlib.abc
|
||||
import importlib.util
|
||||
|
||||
try:
|
||||
mod = importlib.import_module('setuptools._distutils')
|
||||
except Exception:
|
||||
# There are a couple of cases where setuptools._distutils
|
||||
# may not be present:
|
||||
# - An older Setuptools without a local distutils is
|
||||
# taking precedence. Ref #2957.
|
||||
# - Path manipulation during sitecustomize removes
|
||||
# setuptools from the path but only after the hook
|
||||
# has been loaded. Ref #2980.
|
||||
# In either case, fall back to stdlib behavior.
|
||||
return
|
||||
|
||||
class DistutilsLoader(importlib.abc.Loader):
|
||||
def create_module(self, spec):
|
||||
mod.__name__ = 'distutils'
|
||||
return mod
|
||||
|
||||
def exec_module(self, module):
|
||||
pass
|
||||
|
||||
return importlib.util.spec_from_loader(
|
||||
'distutils', DistutilsLoader(), origin=mod.__file__
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def is_cpython():
|
||||
"""
|
||||
Suppress supplying distutils for CPython (build and tests).
|
||||
Ref #2965 and #3007.
|
||||
"""
|
||||
return os.path.isfile('pybuilddir.txt')
|
||||
|
||||
def spec_for_pip(self):
|
||||
"""
|
||||
Ensure stdlib distutils when running under pip.
|
||||
See pypa/pip#8761 for rationale.
|
||||
"""
|
||||
if self.pip_imported_during_build():
|
||||
return
|
||||
clear_distutils()
|
||||
self.spec_for_distutils = lambda: None
|
||||
|
||||
@classmethod
|
||||
def pip_imported_during_build(cls):
|
||||
"""
|
||||
Detect if pip is being imported in a build script. Ref #2355.
|
||||
"""
|
||||
import traceback
|
||||
|
||||
return any(
|
||||
cls.frame_file_is_setup(frame) for frame, line in traceback.walk_stack(None)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def frame_file_is_setup(frame):
|
||||
"""
|
||||
Return True if the indicated frame suggests a setup.py file.
|
||||
"""
|
||||
# some frames may not have __file__ (#2940)
|
||||
return frame.f_globals.get('__file__', '').endswith('setup.py')
|
||||
|
||||
def spec_for_sensitive_tests(self):
|
||||
"""
|
||||
Ensure stdlib distutils when running select tests under CPython.
|
||||
|
||||
python/cpython#91169
|
||||
"""
|
||||
clear_distutils()
|
||||
self.spec_for_distutils = lambda: None
|
||||
|
||||
sensitive_tests = (
|
||||
[
|
||||
'test.test_distutils',
|
||||
'test.test_peg_generator',
|
||||
'test.test_importlib',
|
||||
]
|
||||
if sys.version_info < (3, 10)
|
||||
else [
|
||||
'test.test_distutils',
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
for name in DistutilsMetaFinder.sensitive_tests:
|
||||
setattr(
|
||||
DistutilsMetaFinder,
|
||||
f'spec_for_{name}',
|
||||
DistutilsMetaFinder.spec_for_sensitive_tests,
|
||||
)
|
||||
|
||||
|
||||
DISTUTILS_FINDER = DistutilsMetaFinder()
|
||||
|
||||
|
||||
def add_shim():
|
||||
DISTUTILS_FINDER in sys.meta_path or insert_shim()
|
||||
|
||||
|
||||
class shim:
|
||||
def __enter__(self):
|
||||
insert_shim()
|
||||
|
||||
def __exit__(self, exc, value, tb):
|
||||
remove_shim()
|
||||
|
||||
|
||||
def insert_shim():
|
||||
sys.meta_path.insert(0, DISTUTILS_FINDER)
|
||||
|
||||
|
||||
def remove_shim():
|
||||
try:
|
||||
sys.meta_path.remove(DISTUTILS_FINDER)
|
||||
except ValueError:
|
||||
pass
|
Binary file not shown.
Binary file not shown.
@ -1 +0,0 @@
|
||||
__import__('_distutils_hack').do_override()
|
@ -1 +0,0 @@
|
||||
pip
|
@ -1,20 +0,0 @@
|
||||
This package contains a modified version of ca-bundle.crt:
|
||||
|
||||
ca-bundle.crt -- Bundle of CA Root Certificates
|
||||
|
||||
This is a bundle of X.509 certificates of public Certificate Authorities
|
||||
(CA). These were automatically extracted from Mozilla's root certificates
|
||||
file (certdata.txt). This file can be found in the mozilla source tree:
|
||||
https://hg.mozilla.org/mozilla-central/file/tip/security/nss/lib/ckfw/builtins/certdata.txt
|
||||
It contains the certificates in PEM format and therefore
|
||||
can be directly used with curl / libcurl / php_curl, or with
|
||||
an Apache+mod_ssl webserver for SSL client authentication.
|
||||
Just configure this file as the SSLCACertificateFile.#
|
||||
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain
|
||||
one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
@(#) $RCSfile: certdata.txt,v $ $Revision: 1.80 $ $Date: 2011/11/03 15:11:58 $
|
@ -1,66 +0,0 @@
|
||||
Metadata-Version: 2.1
|
||||
Name: certifi
|
||||
Version: 2024.2.2
|
||||
Summary: Python package for providing Mozilla's CA Bundle.
|
||||
Home-page: https://github.com/certifi/python-certifi
|
||||
Author: Kenneth Reitz
|
||||
Author-email: me@kennethreitz.com
|
||||
License: MPL-2.0
|
||||
Project-URL: Source, https://github.com/certifi/python-certifi
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
|
||||
Classifier: Natural Language :: English
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Requires-Python: >=3.6
|
||||
License-File: LICENSE
|
||||
|
||||
Certifi: Python SSL Certificates
|
||||
================================
|
||||
|
||||
Certifi provides Mozilla's carefully curated collection of Root Certificates for
|
||||
validating the trustworthiness of SSL certificates while verifying the identity
|
||||
of TLS hosts. It has been extracted from the `Requests`_ project.
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
``certifi`` is available on PyPI. Simply install it with ``pip``::
|
||||
|
||||
$ pip install certifi
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
To reference the installed certificate authority (CA) bundle, you can use the
|
||||
built-in function::
|
||||
|
||||
>>> import certifi
|
||||
|
||||
>>> certifi.where()
|
||||
'/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'
|
||||
|
||||
Or from the command line::
|
||||
|
||||
$ python -m certifi
|
||||
/usr/local/lib/python3.7/site-packages/certifi/cacert.pem
|
||||
|
||||
Enjoy!
|
||||
|
||||
.. _`Requests`: https://requests.readthedocs.io/en/master/
|
||||
|
||||
Addition/Removal of Certificates
|
||||
--------------------------------
|
||||
|
||||
Certifi does not support any addition/removal or other modification of the
|
||||
CA trust store content. This project is intended to provide a reliable and
|
||||
highly portable root of trust to python deployments. Look to upstream projects
|
||||
for methods to use alternate trust.
|
@ -1,14 +0,0 @@
|
||||
certifi-2024.2.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||
certifi-2024.2.2.dist-info/LICENSE,sha256=6TcW2mucDVpKHfYP5pWzcPBpVgPSH2-D8FPkLPwQyvc,989
|
||||
certifi-2024.2.2.dist-info/METADATA,sha256=1noreLRChpOgeSj0uJT1mehiBl8ngh33Guc7KdvzYYM,2170
|
||||
certifi-2024.2.2.dist-info/RECORD,,
|
||||
certifi-2024.2.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
||||
certifi-2024.2.2.dist-info/top_level.txt,sha256=KMu4vUCfsjLrkPbSNdgdekS-pVJzBAJFO__nI8NF6-U,8
|
||||
certifi/__init__.py,sha256=ljtEx-EmmPpTe2SOd5Kzsujm_lUD0fKJVnE9gzce320,94
|
||||
certifi/__main__.py,sha256=xBBoj905TUWBLRGANOcf7oi6e-3dMP4cEoG9OyMs11g,243
|
||||
certifi/__pycache__/__init__.cpython-311.pyc,,
|
||||
certifi/__pycache__/__main__.cpython-311.pyc,,
|
||||
certifi/__pycache__/core.cpython-311.pyc,,
|
||||
certifi/cacert.pem,sha256=ejR8qP724p-CtuR4U1WmY1wX-nVeCUD2XxWqj8e9f5I,292541
|
||||
certifi/core.py,sha256=qRDDFyXVJwTB_EmoGppaXU_R9qCZvhl-EzxPMuV3nTA,4426
|
||||
certifi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@ -1,5 +0,0 @@
|
||||
Wheel-Version: 1.0
|
||||
Generator: bdist_wheel (0.42.0)
|
||||
Root-Is-Purelib: true
|
||||
Tag: py3-none-any
|
||||
|
@ -1 +0,0 @@
|
||||
certifi
|
@ -1,4 +0,0 @@
|
||||
from .core import contents, where
|
||||
|
||||
__all__ = ["contents", "where"]
|
||||
__version__ = "2024.02.02"
|
@ -1,12 +0,0 @@
|
||||
import argparse
|
||||
|
||||
from certifi import contents, where
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-c", "--contents", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.contents:
|
||||
print(contents())
|
||||
else:
|
||||
print(where())
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -1,114 +0,0 @@
|
||||
"""
|
||||
certifi.py
|
||||
~~~~~~~~~~
|
||||
|
||||
This module returns the installation location of cacert.pem or its contents.
|
||||
"""
|
||||
import sys
|
||||
import atexit
|
||||
|
||||
def exit_cacert_ctx() -> None:
|
||||
_CACERT_CTX.__exit__(None, None, None) # type: ignore[union-attr]
|
||||
|
||||
|
||||
if sys.version_info >= (3, 11):
|
||||
|
||||
from importlib.resources import as_file, files
|
||||
|
||||
_CACERT_CTX = None
|
||||
_CACERT_PATH = None
|
||||
|
||||
def where() -> str:
|
||||
# This is slightly terrible, but we want to delay extracting the file
|
||||
# in cases where we're inside of a zipimport situation until someone
|
||||
# actually calls where(), but we don't want to re-extract the file
|
||||
# on every call of where(), so we'll do it once then store it in a
|
||||
# global variable.
|
||||
global _CACERT_CTX
|
||||
global _CACERT_PATH
|
||||
if _CACERT_PATH is None:
|
||||
# This is slightly janky, the importlib.resources API wants you to
|
||||
# manage the cleanup of this file, so it doesn't actually return a
|
||||
# path, it returns a context manager that will give you the path
|
||||
# when you enter it and will do any cleanup when you leave it. In
|
||||
# the common case of not needing a temporary file, it will just
|
||||
# return the file system location and the __exit__() is a no-op.
|
||||
#
|
||||
# We also have to hold onto the actual context manager, because
|
||||
# it will do the cleanup whenever it gets garbage collected, so
|
||||
# we will also store that at the global level as well.
|
||||
_CACERT_CTX = as_file(files("certifi").joinpath("cacert.pem"))
|
||||
_CACERT_PATH = str(_CACERT_CTX.__enter__())
|
||||
atexit.register(exit_cacert_ctx)
|
||||
|
||||
return _CACERT_PATH
|
||||
|
||||
def contents() -> str:
|
||||
return files("certifi").joinpath("cacert.pem").read_text(encoding="ascii")
|
||||
|
||||
elif sys.version_info >= (3, 7):
|
||||
|
||||
from importlib.resources import path as get_path, read_text
|
||||
|
||||
_CACERT_CTX = None
|
||||
_CACERT_PATH = None
|
||||
|
||||
def where() -> str:
|
||||
# This is slightly terrible, but we want to delay extracting the
|
||||
# file in cases where we're inside of a zipimport situation until
|
||||
# someone actually calls where(), but we don't want to re-extract
|
||||
# the file on every call of where(), so we'll do it once then store
|
||||
# it in a global variable.
|
||||
global _CACERT_CTX
|
||||
global _CACERT_PATH
|
||||
if _CACERT_PATH is None:
|
||||
# This is slightly janky, the importlib.resources API wants you
|
||||
# to manage the cleanup of this file, so it doesn't actually
|
||||
# return a path, it returns a context manager that will give
|
||||
# you the path when you enter it and will do any cleanup when
|
||||
# you leave it. In the common case of not needing a temporary
|
||||
# file, it will just return the file system location and the
|
||||
# __exit__() is a no-op.
|
||||
#
|
||||
# We also have to hold onto the actual context manager, because
|
||||
# it will do the cleanup whenever it gets garbage collected, so
|
||||
# we will also store that at the global level as well.
|
||||
_CACERT_CTX = get_path("certifi", "cacert.pem")
|
||||
_CACERT_PATH = str(_CACERT_CTX.__enter__())
|
||||
atexit.register(exit_cacert_ctx)
|
||||
|
||||
return _CACERT_PATH
|
||||
|
||||
def contents() -> str:
|
||||
return read_text("certifi", "cacert.pem", encoding="ascii")
|
||||
|
||||
else:
|
||||
import os
|
||||
import types
|
||||
from typing import Union
|
||||
|
||||
Package = Union[types.ModuleType, str]
|
||||
Resource = Union[str, "os.PathLike"]
|
||||
|
||||
# This fallback will work for Python versions prior to 3.7 that lack the
|
||||
# importlib.resources module but relies on the existing `where` function
|
||||
# so won't address issues with environments like PyOxidizer that don't set
|
||||
# __file__ on modules.
|
||||
def read_text(
|
||||
package: Package,
|
||||
resource: Resource,
|
||||
encoding: str = 'utf-8',
|
||||
errors: str = 'strict'
|
||||
) -> str:
|
||||
with open(where(), encoding=encoding) as data:
|
||||
return data.read()
|
||||
|
||||
# If we don't have importlib.resources, then we will just do the old logic
|
||||
# of assuming we're on the filesystem and munge the path directly.
|
||||
def where() -> str:
|
||||
f = os.path.dirname(__file__)
|
||||
|
||||
return os.path.join(f, "cacert.pem")
|
||||
|
||||
def contents() -> str:
|
||||
return read_text("certifi", "cacert.pem", encoding="ascii")
|
@ -1 +0,0 @@
|
||||
pip
|
@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 TAHRI Ahmed R.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -1,683 +0,0 @@
|
||||
Metadata-Version: 2.1
|
||||
Name: charset-normalizer
|
||||
Version: 3.3.2
|
||||
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
|
||||
Home-page: https://github.com/Ousret/charset_normalizer
|
||||
Author: Ahmed TAHRI
|
||||
Author-email: ahmed.tahri@cloudnursery.dev
|
||||
License: MIT
|
||||
Project-URL: Bug Reports, https://github.com/Ousret/charset_normalizer/issues
|
||||
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/en/latest
|
||||
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Text Processing :: Linguistic
|
||||
Classifier: Topic :: Utilities
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.7.0
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE
|
||||
Provides-Extra: unicode_backport
|
||||
|
||||
<h1 align="center">Charset Detection, for Everyone 👋</h1>
|
||||
|
||||
<p align="center">
|
||||
<sup>The Real First Universal Charset Detector</sup><br>
|
||||
<a href="https://pypi.org/project/charset-normalizer">
|
||||
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
|
||||
</a>
|
||||
<a href="https://pepy.tech/project/charset-normalizer/">
|
||||
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
|
||||
</a>
|
||||
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
|
||||
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>Featured Packages</i></sup><br>
|
||||
<a href="https://github.com/jawah/niquests">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-HTTP_1.1%2C%202%2C_and_3_Client-cyan">
|
||||
</a>
|
||||
<a href="https://github.com/jawah/wassima">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Killer-cyan">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
|
||||
<a href="https://github.com/nickspring/charset-normalizer-rs">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
|
||||
> I'm trying to resolve the issue by taking a new approach.
|
||||
> All IANA character set names for which the Python core library provides codecs are supported.
|
||||
|
||||
<p align="center">
|
||||
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
|
||||
</p>
|
||||
|
||||
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
|
||||
|
||||
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|
||||
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
|
||||
| `Fast` | ❌ | ✅ | ✅ |
|
||||
| `Universal**` | ❌ | ✅ | ❌ |
|
||||
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
|
||||
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
|
||||
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
|
||||
| `Native Python` | ✅ | ✅ | ❌ |
|
||||
| `Detect spoken language` | ❌ | ✅ | N/A |
|
||||
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
|
||||
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
|
||||
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
|
||||
|
||||
<p align="center">
|
||||
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
|
||||
</p>
|
||||
|
||||
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
|
||||
Did you got there because of the logs? See [https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html](https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html)
|
||||
|
||||
## ⚡ Performance
|
||||
|
||||
This package offer better performance than its counterpart Chardet. Here are some numbers.
|
||||
|
||||
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|
||||
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 86 % | 200 ms | 5 file/sec |
|
||||
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
|
||||
|
||||
| Package | 99th percentile | 95th percentile | 50th percentile |
|
||||
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 1200 ms | 287 ms | 23 ms |
|
||||
| charset-normalizer | 100 ms | 50 ms | 5 ms |
|
||||
|
||||
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
|
||||
|
||||
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
|
||||
> And yes, these results might change at any time. The dataset can be updated to include more files.
|
||||
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
|
||||
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
|
||||
> (eg. Supported Encoding) Challenge-them if you want.
|
||||
|
||||
## ✨ Installation
|
||||
|
||||
Using pip:
|
||||
|
||||
```sh
|
||||
pip install charset-normalizer -U
|
||||
```
|
||||
|
||||
## 🚀 Basic Usage
|
||||
|
||||
### CLI
|
||||
This package comes with a CLI.
|
||||
|
||||
```
|
||||
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
|
||||
file [file ...]
|
||||
|
||||
The Real First Universal Charset Detector. Discover originating encoding used
|
||||
on text file. Normalize text to unicode.
|
||||
|
||||
positional arguments:
|
||||
files File(s) to be analysed
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-v, --verbose Display complementary information about file if any.
|
||||
Stdout will contain logs about the detection process.
|
||||
-a, --with-alternative
|
||||
Output complementary possibilities if any. Top-level
|
||||
JSON WILL be a list.
|
||||
-n, --normalize Permit to normalize input file. If not set, program
|
||||
does not write anything.
|
||||
-m, --minimal Only output the charset detected to STDOUT. Disabling
|
||||
JSON output.
|
||||
-r, --replace Replace file when trying to normalize it instead of
|
||||
creating a new one.
|
||||
-f, --force Replace file without asking if you are sure, use this
|
||||
flag with caution.
|
||||
-t THRESHOLD, --threshold THRESHOLD
|
||||
Define a custom maximum amount of chaos allowed in
|
||||
decoded content. 0. <= chaos <= 1.
|
||||
--version Show version information and exit.
|
||||
```
|
||||
|
||||
```bash
|
||||
normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
python -m charset_normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
|
||||
|
||||
```json
|
||||
{
|
||||
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
|
||||
"encoding": "cp1252",
|
||||
"encoding_aliases": [
|
||||
"1252",
|
||||
"windows_1252"
|
||||
],
|
||||
"alternative_encodings": [
|
||||
"cp1254",
|
||||
"cp1256",
|
||||
"cp1258",
|
||||
"iso8859_14",
|
||||
"iso8859_15",
|
||||
"iso8859_16",
|
||||
"iso8859_3",
|
||||
"iso8859_9",
|
||||
"latin_1",
|
||||
"mbcs"
|
||||
],
|
||||
"language": "French",
|
||||
"alphabets": [
|
||||
"Basic Latin",
|
||||
"Latin-1 Supplement"
|
||||
],
|
||||
"has_sig_or_bom": false,
|
||||
"chaos": 0.149,
|
||||
"coherence": 97.152,
|
||||
"unicode_path": null,
|
||||
"is_preferred": true
|
||||
}
|
||||
```
|
||||
|
||||
### Python
|
||||
*Just print out normalized text*
|
||||
```python
|
||||
from charset_normalizer import from_path
|
||||
|
||||
results = from_path('./my_subtitle.srt')
|
||||
|
||||
print(str(results.best()))
|
||||
```
|
||||
|
||||
*Upgrade your code without effort*
|
||||
```python
|
||||
from charset_normalizer import detect
|
||||
```
|
||||
|
||||
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
|
||||
|
||||
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
|
||||
|
||||
## 😇 Why
|
||||
|
||||
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
|
||||
reliable alternative using a completely different method. Also! I never back down on a good challenge!
|
||||
|
||||
I **don't care** about the **originating charset** encoding, because **two different tables** can
|
||||
produce **two identical rendered string.**
|
||||
What I want is to get readable text, the best I can.
|
||||
|
||||
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
|
||||
|
||||
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
|
||||
|
||||
## 🍰 How
|
||||
|
||||
- Discard all charset encoding table that could not fit the binary content.
|
||||
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
|
||||
- Extract matches with the lowest mess detected.
|
||||
- Additionally, we measure coherence / probe for a language.
|
||||
|
||||
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
|
||||
|
||||
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
|
||||
**I established** some ground rules about **what is obvious** when **it seems like** a mess.
|
||||
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
|
||||
improve or rewrite it.
|
||||
|
||||
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
|
||||
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
|
||||
|
||||
## ⚡ Known limitations
|
||||
|
||||
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
|
||||
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
|
||||
|
||||
## ⚠️ About Python EOLs
|
||||
|
||||
**If you are running:**
|
||||
|
||||
- Python >=2.7,<3.5: Unsupported
|
||||
- Python 3.5: charset-normalizer < 2.1
|
||||
- Python 3.6: charset-normalizer < 3.1
|
||||
- Python 3.7: charset-normalizer < 4.0
|
||||
|
||||
Upgrade your Python interpreter as soon as possible.
|
||||
|
||||
## 👤 Contributing
|
||||
|
||||
Contributions, issues and feature requests are very much welcome.<br />
|
||||
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
|
||||
|
||||
## 📝 License
|
||||
|
||||
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
|
||||
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
|
||||
|
||||
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
|
||||
|
||||
## 💼 For Enterprise
|
||||
|
||||
Professional support for charset-normalizer is available as part of the [Tidelift
|
||||
Subscription][1]. Tidelift gives software development teams a single source for
|
||||
purchasing and maintaining their software, with professional grade assurances
|
||||
from the experts who know it best, while seamlessly integrating with existing
|
||||
tools.
|
||||
|
||||
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
|
||||
|
||||
# Changelog
|
||||
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||
|
||||
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
|
||||
|
||||
### Fixed
|
||||
- Unintentional memory usage regression when using large payload that match several encoding (#376)
|
||||
- Regression on some detection case showcased in the documentation (#371)
|
||||
|
||||
### Added
|
||||
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
|
||||
|
||||
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
|
||||
- Improved the general detection reliability based on reports from the community
|
||||
|
||||
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
|
||||
|
||||
### Added
|
||||
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
|
||||
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
|
||||
|
||||
### Removed
|
||||
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
|
||||
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
|
||||
|
||||
### Changed
|
||||
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
|
||||
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
|
||||
|
||||
### Fixed
|
||||
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
|
||||
|
||||
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
|
||||
|
||||
### Changed
|
||||
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
|
||||
- Minor improvement over the global detection reliability
|
||||
|
||||
### Added
|
||||
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
|
||||
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
|
||||
- Explicit support for Python 3.12
|
||||
|
||||
### Fixed
|
||||
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
|
||||
|
||||
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
|
||||
|
||||
### Added
|
||||
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.6 (PR #260)
|
||||
|
||||
### Changed
|
||||
- Optional speedup provided by mypy/c 1.0.1
|
||||
|
||||
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
|
||||
|
||||
### Fixed
|
||||
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
|
||||
|
||||
### Changed
|
||||
- Speedup provided by mypy/c 0.990 on Python >= 3.7
|
||||
|
||||
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
|
||||
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
|
||||
|
||||
### Added
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Removed
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
|
||||
### Fixed
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
|
||||
|
||||
### Changed
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Removed
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
|
||||
|
||||
### Deprecated
|
||||
- Function `normalize` scheduled for removal in 3.0
|
||||
|
||||
### Changed
|
||||
- Removed useless call to decode in fn is_unprintable (#206)
|
||||
|
||||
### Fixed
|
||||
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
|
||||
|
||||
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
|
||||
|
||||
### Added
|
||||
- Output the Unicode table version when running the CLI with `--version` (PR #194)
|
||||
|
||||
### Changed
|
||||
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
|
||||
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
|
||||
|
||||
### Fixed
|
||||
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
|
||||
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.5 (PR #192)
|
||||
|
||||
### Deprecated
|
||||
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
|
||||
|
||||
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
|
||||
|
||||
### Fixed
|
||||
- ASCII miss-detection on rare cases (PR #170)
|
||||
|
||||
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
|
||||
|
||||
### Added
|
||||
- Explicit support for Python 3.11 (PR #164)
|
||||
|
||||
### Changed
|
||||
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
|
||||
|
||||
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
|
||||
|
||||
### Fixed
|
||||
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
|
||||
|
||||
### Changed
|
||||
- Skipping the language-detection (CD) on ASCII (PR #155)
|
||||
|
||||
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
|
||||
|
||||
### Changed
|
||||
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
|
||||
|
||||
### Fixed
|
||||
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
|
||||
|
||||
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
|
||||
### Changed
|
||||
- Improvement over Vietnamese detection (PR #126)
|
||||
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
|
||||
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
|
||||
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
|
||||
- Code style as refactored by Sourcery-AI (PR #131)
|
||||
- Minor adjustment on the MD around european words (PR #133)
|
||||
- Remove and replace SRTs from assets / tests (PR #139)
|
||||
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
|
||||
|
||||
### Fixed
|
||||
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
|
||||
- Avoid using too insignificant chunk (PR #137)
|
||||
|
||||
### Added
|
||||
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
|
||||
|
||||
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
|
||||
### Added
|
||||
- Add support for Kazakh (Cyrillic) language detection (PR #109)
|
||||
|
||||
### Changed
|
||||
- Further, improve inferring the language from a given single-byte code page (PR #112)
|
||||
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
|
||||
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
|
||||
- Various detection improvement (MD+CD) (PR #117)
|
||||
|
||||
### Removed
|
||||
- Remove redundant logging entry about detected language(s) (PR #115)
|
||||
|
||||
### Fixed
|
||||
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
|
||||
|
||||
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
|
||||
### Fixed
|
||||
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
|
||||
- Fix CLI crash when using --minimal output in certain cases (PR #103)
|
||||
|
||||
### Changed
|
||||
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
|
||||
|
||||
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
|
||||
### Changed
|
||||
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
|
||||
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
|
||||
- The Unicode detection is slightly improved (PR #93)
|
||||
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
|
||||
|
||||
### Removed
|
||||
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
|
||||
|
||||
### Fixed
|
||||
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
|
||||
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
|
||||
- The MANIFEST.in was not exhaustive (PR #78)
|
||||
|
||||
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
|
||||
### Fixed
|
||||
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
|
||||
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
|
||||
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
|
||||
- Submatch factoring could be wrong in rare edge cases (PR #72)
|
||||
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
|
||||
- Fix line endings from CRLF to LF for certain project files (PR #67)
|
||||
|
||||
### Changed
|
||||
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
|
||||
- Allow fallback on specified encoding if any (PR #71)
|
||||
|
||||
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
|
||||
### Changed
|
||||
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
|
||||
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
|
||||
|
||||
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
|
||||
### Fixed
|
||||
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
|
||||
|
||||
### Changed
|
||||
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
|
||||
|
||||
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
|
||||
### Fixed
|
||||
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
|
||||
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
|
||||
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
|
||||
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
|
||||
|
||||
### Changed
|
||||
- Public function normalize default args values were not aligned with from_bytes (PR #53)
|
||||
|
||||
### Added
|
||||
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
|
||||
|
||||
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
|
||||
### Changed
|
||||
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
|
||||
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
|
||||
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
|
||||
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
|
||||
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
|
||||
- utf_7 detection has been reinstated.
|
||||
|
||||
### Removed
|
||||
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
|
||||
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
|
||||
- The exception hook on UnicodeDecodeError has been removed.
|
||||
|
||||
### Deprecated
|
||||
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
|
||||
|
||||
### Fixed
|
||||
- The CLI output used the relative path of the file(s). Should be absolute.
|
||||
|
||||
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
|
||||
### Fixed
|
||||
- Logger configuration/usage no longer conflict with others (PR #44)
|
||||
|
||||
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
|
||||
### Removed
|
||||
- Using standard logging instead of using the package loguru.
|
||||
- Dropping nose test framework in favor of the maintained pytest.
|
||||
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
|
||||
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
|
||||
- Stop support for UTF-7 that does not contain a SIG.
|
||||
- Dropping PrettyTable, replaced with pure JSON output in CLI.
|
||||
|
||||
### Fixed
|
||||
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
|
||||
- Not searching properly for the BOM when trying utf32/16 parent codec.
|
||||
|
||||
### Changed
|
||||
- Improving the package final size by compressing frequencies.json.
|
||||
- Huge improvement over the larges payload.
|
||||
|
||||
### Added
|
||||
- CLI now produces JSON consumable output.
|
||||
- Return ASCII if given sequences fit. Given reasonable confidence.
|
||||
|
||||
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
|
||||
|
||||
### Fixed
|
||||
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
|
||||
|
||||
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
|
||||
|
||||
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
|
||||
|
||||
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
|
||||
|
||||
### Changed
|
||||
- Amend the previous release to allow prettytable 2.0 (PR #35)
|
||||
|
||||
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
|
||||
|
||||
### Fixed
|
||||
- Fix error while using the package with a python pre-release interpreter (PR #33)
|
||||
|
||||
### Changed
|
||||
- Dependencies refactoring, constraints revised.
|
||||
|
||||
### Added
|
||||
- Add python 3.9 and 3.10 to the supported interpreters
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 TAHRI Ahmed R.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -1,35 +0,0 @@
|
||||
../../../bin/normalizer,sha256=5D1-7f3zVCWv-rg7SEwn4zKsSPfhR3uoPudhjbmECOc,276
|
||||
charset_normalizer-3.3.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||
charset_normalizer-3.3.2.dist-info/LICENSE,sha256=6zGgxaT7Cbik4yBV0lweX5w1iidS_vPNcgIT0cz-4kE,1070
|
||||
charset_normalizer-3.3.2.dist-info/METADATA,sha256=cfLhl5A6SI-F0oclm8w8ux9wshL1nipdeCdVnYb4AaA,33550
|
||||
charset_normalizer-3.3.2.dist-info/RECORD,,
|
||||
charset_normalizer-3.3.2.dist-info/WHEEL,sha256=48wUIcZcdQ2pWN7qt0HP02Cvv6HIQZGsSgx3PsepNj8,152
|
||||
charset_normalizer-3.3.2.dist-info/entry_points.txt,sha256=ADSTKrkXZ3hhdOVFi6DcUEHQRS0xfxDIE_pEz4wLIXA,65
|
||||
charset_normalizer-3.3.2.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19
|
||||
charset_normalizer/__init__.py,sha256=UzI3xC8PhmcLRMzSgPb6minTmRq0kWznnCBJ8ZCc2XI,1577
|
||||
charset_normalizer/__main__.py,sha256=JxY8bleaENOFlLRb9HfoeZCzAMnn2A1oGR5Xm2eyqg0,73
|
||||
charset_normalizer/__pycache__/__init__.cpython-311.pyc,,
|
||||
charset_normalizer/__pycache__/__main__.cpython-311.pyc,,
|
||||
charset_normalizer/__pycache__/api.cpython-311.pyc,,
|
||||
charset_normalizer/__pycache__/cd.cpython-311.pyc,,
|
||||
charset_normalizer/__pycache__/constant.cpython-311.pyc,,
|
||||
charset_normalizer/__pycache__/legacy.cpython-311.pyc,,
|
||||
charset_normalizer/__pycache__/md.cpython-311.pyc,,
|
||||
charset_normalizer/__pycache__/models.cpython-311.pyc,,
|
||||
charset_normalizer/__pycache__/utils.cpython-311.pyc,,
|
||||
charset_normalizer/__pycache__/version.cpython-311.pyc,,
|
||||
charset_normalizer/api.py,sha256=WOlWjy6wT8SeMYFpaGbXZFN1TMXa-s8vZYfkL4G29iQ,21097
|
||||
charset_normalizer/cd.py,sha256=xwZliZcTQFA3jU0c00PRiu9MNxXTFxQkFLWmMW24ZzI,12560
|
||||
charset_normalizer/cli/__init__.py,sha256=D5ERp8P62llm2FuoMzydZ7d9rs8cvvLXqE-1_6oViPc,100
|
||||
charset_normalizer/cli/__main__.py,sha256=2F-xURZJzo063Ye-2RLJ2wcmURpbKeAzKwpiws65dAs,9744
|
||||
charset_normalizer/cli/__pycache__/__init__.cpython-311.pyc,,
|
||||
charset_normalizer/cli/__pycache__/__main__.cpython-311.pyc,,
|
||||
charset_normalizer/constant.py,sha256=p0IsOVcEbPWYPOdWhnhRbjK1YVBy6fs05C5vKC-zoxU,40481
|
||||
charset_normalizer/legacy.py,sha256=T-QuVMsMeDiQEk8WSszMrzVJg_14AMeSkmHdRYhdl1k,2071
|
||||
charset_normalizer/md.cpython-311-x86_64-linux-gnu.so,sha256=Y7QSLD5QLoSFAWys0-tL7R6QB7oi5864zM6zr7RWek4,16064
|
||||
charset_normalizer/md.py,sha256=NkSuVLK13_a8c7BxZ4cGIQ5vOtGIWOdh22WZEvjp-7U,19624
|
||||
charset_normalizer/md__mypyc.cpython-311-x86_64-linux-gnu.so,sha256=93T0C_hoJxReTevc7NpjM7P7fae_U-scv5B-AhkKKtY,264392
|
||||
charset_normalizer/models.py,sha256=I5i0s4aKCCgLPY2tUY3pwkgFA-BUbbNxQ7hVkVTt62s,11624
|
||||
charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
charset_normalizer/utils.py,sha256=teiosMqzKjXyAHXnGdjSBOgnBZwx-SkBbCLrx0UXy8M,11894
|
||||
charset_normalizer/version.py,sha256=iHKUfHD3kDRSyrh_BN2ojh43TA5-UZQjvbVIEFfpHDs,79
|
@ -1,6 +0,0 @@
|
||||
Wheel-Version: 1.0
|
||||
Generator: bdist_wheel (0.41.2)
|
||||
Root-Is-Purelib: false
|
||||
Tag: cp311-cp311-manylinux_2_17_x86_64
|
||||
Tag: cp311-cp311-manylinux2014_x86_64
|
||||
|
@ -1,2 +0,0 @@
|
||||
[console_scripts]
|
||||
normalizer = charset_normalizer.cli:cli_detect
|
@ -1 +0,0 @@
|
||||
charset_normalizer
|
@ -1,46 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Charset-Normalizer
|
||||
~~~~~~~~~~~~~~
|
||||
The Real First Universal Charset Detector.
|
||||
A library that helps you read text from an unknown charset encoding.
|
||||
Motivated by chardet, This package is trying to resolve the issue by taking a new approach.
|
||||
All IANA character set names for which the Python core library provides codecs are supported.
|
||||
|
||||
Basic usage:
|
||||
>>> from charset_normalizer import from_bytes
|
||||
>>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8'))
|
||||
>>> best_guess = results.best()
|
||||
>>> str(best_guess)
|
||||
'Bсеки човек има право на образование. Oбразованието!'
|
||||
|
||||
Others methods and usages are available - see the full documentation
|
||||
at <https://github.com/Ousret/charset_normalizer>.
|
||||
:copyright: (c) 2021 by Ahmed TAHRI
|
||||
:license: MIT, see LICENSE for more details.
|
||||
"""
|
||||
import logging
|
||||
|
||||
from .api import from_bytes, from_fp, from_path, is_binary
|
||||
from .legacy import detect
|
||||
from .models import CharsetMatch, CharsetMatches
|
||||
from .utils import set_logging_handler
|
||||
from .version import VERSION, __version__
|
||||
|
||||
__all__ = (
|
||||
"from_fp",
|
||||
"from_path",
|
||||
"from_bytes",
|
||||
"is_binary",
|
||||
"detect",
|
||||
"CharsetMatch",
|
||||
"CharsetMatches",
|
||||
"__version__",
|
||||
"VERSION",
|
||||
"set_logging_handler",
|
||||
)
|
||||
|
||||
# Attach a NullHandler to the top level logger by default
|
||||
# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library
|
||||
|
||||
logging.getLogger("charset_normalizer").addHandler(logging.NullHandler())
|
@ -1,4 +0,0 @@
|
||||
from .cli import cli_detect
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli_detect()
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,626 +0,0 @@
|
||||
import logging
|
||||
from os import PathLike
|
||||
from typing import BinaryIO, List, Optional, Set, Union
|
||||
|
||||
from .cd import (
|
||||
coherence_ratio,
|
||||
encoding_languages,
|
||||
mb_encoding_languages,
|
||||
merge_coherence_ratios,
|
||||
)
|
||||
from .constant import IANA_SUPPORTED, TOO_BIG_SEQUENCE, TOO_SMALL_SEQUENCE, TRACE
|
||||
from .md import mess_ratio
|
||||
from .models import CharsetMatch, CharsetMatches
|
||||
from .utils import (
|
||||
any_specified_encoding,
|
||||
cut_sequence_chunks,
|
||||
iana_name,
|
||||
identify_sig_or_bom,
|
||||
is_cp_similar,
|
||||
is_multi_byte_encoding,
|
||||
should_strip_sig_or_bom,
|
||||
)
|
||||
|
||||
# Will most likely be controversial
|
||||
# logging.addLevelName(TRACE, "TRACE")
|
||||
logger = logging.getLogger("charset_normalizer")
|
||||
explain_handler = logging.StreamHandler()
|
||||
explain_handler.setFormatter(
|
||||
logging.Formatter("%(asctime)s | %(levelname)s | %(message)s")
|
||||
)
|
||||
|
||||
|
||||
def from_bytes(
|
||||
sequences: Union[bytes, bytearray],
|
||||
steps: int = 5,
|
||||
chunk_size: int = 512,
|
||||
threshold: float = 0.2,
|
||||
cp_isolation: Optional[List[str]] = None,
|
||||
cp_exclusion: Optional[List[str]] = None,
|
||||
preemptive_behaviour: bool = True,
|
||||
explain: bool = False,
|
||||
language_threshold: float = 0.1,
|
||||
enable_fallback: bool = True,
|
||||
) -> CharsetMatches:
|
||||
"""
|
||||
Given a raw bytes sequence, return the best possibles charset usable to render str objects.
|
||||
If there is no results, it is a strong indicator that the source is binary/not text.
|
||||
By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
|
||||
And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.
|
||||
|
||||
The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
|
||||
but never take it for granted. Can improve the performance.
|
||||
|
||||
You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
|
||||
purpose.
|
||||
|
||||
This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
|
||||
By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
|
||||
toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
|
||||
Custom logging format and handler can be set manually.
|
||||
"""
|
||||
|
||||
if not isinstance(sequences, (bytearray, bytes)):
|
||||
raise TypeError(
|
||||
"Expected object of type bytes or bytearray, got: {0}".format(
|
||||
type(sequences)
|
||||
)
|
||||
)
|
||||
|
||||
if explain:
|
||||
previous_logger_level: int = logger.level
|
||||
logger.addHandler(explain_handler)
|
||||
logger.setLevel(TRACE)
|
||||
|
||||
length: int = len(sequences)
|
||||
|
||||
if length == 0:
|
||||
logger.debug("Encoding detection on empty bytes, assuming utf_8 intention.")
|
||||
if explain:
|
||||
logger.removeHandler(explain_handler)
|
||||
logger.setLevel(previous_logger_level or logging.WARNING)
|
||||
return CharsetMatches([CharsetMatch(sequences, "utf_8", 0.0, False, [], "")])
|
||||
|
||||
if cp_isolation is not None:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"cp_isolation is set. use this flag for debugging purpose. "
|
||||
"limited list of encoding allowed : %s.",
|
||||
", ".join(cp_isolation),
|
||||
)
|
||||
cp_isolation = [iana_name(cp, False) for cp in cp_isolation]
|
||||
else:
|
||||
cp_isolation = []
|
||||
|
||||
if cp_exclusion is not None:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"cp_exclusion is set. use this flag for debugging purpose. "
|
||||
"limited list of encoding excluded : %s.",
|
||||
", ".join(cp_exclusion),
|
||||
)
|
||||
cp_exclusion = [iana_name(cp, False) for cp in cp_exclusion]
|
||||
else:
|
||||
cp_exclusion = []
|
||||
|
||||
if length <= (chunk_size * steps):
|
||||
logger.log(
|
||||
TRACE,
|
||||
"override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.",
|
||||
steps,
|
||||
chunk_size,
|
||||
length,
|
||||
)
|
||||
steps = 1
|
||||
chunk_size = length
|
||||
|
||||
if steps > 1 and length / steps < chunk_size:
|
||||
chunk_size = int(length / steps)
|
||||
|
||||
is_too_small_sequence: bool = len(sequences) < TOO_SMALL_SEQUENCE
|
||||
is_too_large_sequence: bool = len(sequences) >= TOO_BIG_SEQUENCE
|
||||
|
||||
if is_too_small_sequence:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Trying to detect encoding from a tiny portion of ({}) byte(s).".format(
|
||||
length
|
||||
),
|
||||
)
|
||||
elif is_too_large_sequence:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Using lazy str decoding because the payload is quite large, ({}) byte(s).".format(
|
||||
length
|
||||
),
|
||||
)
|
||||
|
||||
prioritized_encodings: List[str] = []
|
||||
|
||||
specified_encoding: Optional[str] = (
|
||||
any_specified_encoding(sequences) if preemptive_behaviour else None
|
||||
)
|
||||
|
||||
if specified_encoding is not None:
|
||||
prioritized_encodings.append(specified_encoding)
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Detected declarative mark in sequence. Priority +1 given for %s.",
|
||||
specified_encoding,
|
||||
)
|
||||
|
||||
tested: Set[str] = set()
|
||||
tested_but_hard_failure: List[str] = []
|
||||
tested_but_soft_failure: List[str] = []
|
||||
|
||||
fallback_ascii: Optional[CharsetMatch] = None
|
||||
fallback_u8: Optional[CharsetMatch] = None
|
||||
fallback_specified: Optional[CharsetMatch] = None
|
||||
|
||||
results: CharsetMatches = CharsetMatches()
|
||||
|
||||
sig_encoding, sig_payload = identify_sig_or_bom(sequences)
|
||||
|
||||
if sig_encoding is not None:
|
||||
prioritized_encodings.append(sig_encoding)
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Detected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.",
|
||||
len(sig_payload),
|
||||
sig_encoding,
|
||||
)
|
||||
|
||||
prioritized_encodings.append("ascii")
|
||||
|
||||
if "utf_8" not in prioritized_encodings:
|
||||
prioritized_encodings.append("utf_8")
|
||||
|
||||
for encoding_iana in prioritized_encodings + IANA_SUPPORTED:
|
||||
if cp_isolation and encoding_iana not in cp_isolation:
|
||||
continue
|
||||
|
||||
if cp_exclusion and encoding_iana in cp_exclusion:
|
||||
continue
|
||||
|
||||
if encoding_iana in tested:
|
||||
continue
|
||||
|
||||
tested.add(encoding_iana)
|
||||
|
||||
decoded_payload: Optional[str] = None
|
||||
bom_or_sig_available: bool = sig_encoding == encoding_iana
|
||||
strip_sig_or_bom: bool = bom_or_sig_available and should_strip_sig_or_bom(
|
||||
encoding_iana
|
||||
)
|
||||
|
||||
if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.",
|
||||
encoding_iana,
|
||||
)
|
||||
continue
|
||||
if encoding_iana in {"utf_7"} and not bom_or_sig_available:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Encoding %s won't be tested as-is because detection is unreliable without BOM/SIG.",
|
||||
encoding_iana,
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
is_multi_byte_decoder: bool = is_multi_byte_encoding(encoding_iana)
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Encoding %s does not provide an IncrementalDecoder",
|
||||
encoding_iana,
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
if is_too_large_sequence and is_multi_byte_decoder is False:
|
||||
str(
|
||||
sequences[: int(50e4)]
|
||||
if strip_sig_or_bom is False
|
||||
else sequences[len(sig_payload) : int(50e4)],
|
||||
encoding=encoding_iana,
|
||||
)
|
||||
else:
|
||||
decoded_payload = str(
|
||||
sequences
|
||||
if strip_sig_or_bom is False
|
||||
else sequences[len(sig_payload) :],
|
||||
encoding=encoding_iana,
|
||||
)
|
||||
except (UnicodeDecodeError, LookupError) as e:
|
||||
if not isinstance(e, LookupError):
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Code page %s does not fit given bytes sequence at ALL. %s",
|
||||
encoding_iana,
|
||||
str(e),
|
||||
)
|
||||
tested_but_hard_failure.append(encoding_iana)
|
||||
continue
|
||||
|
||||
similar_soft_failure_test: bool = False
|
||||
|
||||
for encoding_soft_failed in tested_but_soft_failure:
|
||||
if is_cp_similar(encoding_iana, encoding_soft_failed):
|
||||
similar_soft_failure_test = True
|
||||
break
|
||||
|
||||
if similar_soft_failure_test:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"%s is deemed too similar to code page %s and was consider unsuited already. Continuing!",
|
||||
encoding_iana,
|
||||
encoding_soft_failed,
|
||||
)
|
||||
continue
|
||||
|
||||
r_ = range(
|
||||
0 if not bom_or_sig_available else len(sig_payload),
|
||||
length,
|
||||
int(length / steps),
|
||||
)
|
||||
|
||||
multi_byte_bonus: bool = (
|
||||
is_multi_byte_decoder
|
||||
and decoded_payload is not None
|
||||
and len(decoded_payload) < length
|
||||
)
|
||||
|
||||
if multi_byte_bonus:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Code page %s is a multi byte encoding table and it appear that at least one character "
|
||||
"was encoded using n-bytes.",
|
||||
encoding_iana,
|
||||
)
|
||||
|
||||
max_chunk_gave_up: int = int(len(r_) / 4)
|
||||
|
||||
max_chunk_gave_up = max(max_chunk_gave_up, 2)
|
||||
early_stop_count: int = 0
|
||||
lazy_str_hard_failure = False
|
||||
|
||||
md_chunks: List[str] = []
|
||||
md_ratios = []
|
||||
|
||||
try:
|
||||
for chunk in cut_sequence_chunks(
|
||||
sequences,
|
||||
encoding_iana,
|
||||
r_,
|
||||
chunk_size,
|
||||
bom_or_sig_available,
|
||||
strip_sig_or_bom,
|
||||
sig_payload,
|
||||
is_multi_byte_decoder,
|
||||
decoded_payload,
|
||||
):
|
||||
md_chunks.append(chunk)
|
||||
|
||||
md_ratios.append(
|
||||
mess_ratio(
|
||||
chunk,
|
||||
threshold,
|
||||
explain is True and 1 <= len(cp_isolation) <= 2,
|
||||
)
|
||||
)
|
||||
|
||||
if md_ratios[-1] >= threshold:
|
||||
early_stop_count += 1
|
||||
|
||||
if (early_stop_count >= max_chunk_gave_up) or (
|
||||
bom_or_sig_available and strip_sig_or_bom is False
|
||||
):
|
||||
break
|
||||
except (
|
||||
UnicodeDecodeError
|
||||
) as e: # Lazy str loading may have missed something there
|
||||
logger.log(
|
||||
TRACE,
|
||||
"LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s",
|
||||
encoding_iana,
|
||||
str(e),
|
||||
)
|
||||
early_stop_count = max_chunk_gave_up
|
||||
lazy_str_hard_failure = True
|
||||
|
||||
# We might want to check the sequence again with the whole content
|
||||
# Only if initial MD tests passes
|
||||
if (
|
||||
not lazy_str_hard_failure
|
||||
and is_too_large_sequence
|
||||
and not is_multi_byte_decoder
|
||||
):
|
||||
try:
|
||||
sequences[int(50e3) :].decode(encoding_iana, errors="strict")
|
||||
except UnicodeDecodeError as e:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %s",
|
||||
encoding_iana,
|
||||
str(e),
|
||||
)
|
||||
tested_but_hard_failure.append(encoding_iana)
|
||||
continue
|
||||
|
||||
mean_mess_ratio: float = sum(md_ratios) / len(md_ratios) if md_ratios else 0.0
|
||||
if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up:
|
||||
tested_but_soft_failure.append(encoding_iana)
|
||||
logger.log(
|
||||
TRACE,
|
||||
"%s was excluded because of initial chaos probing. Gave up %i time(s). "
|
||||
"Computed mean chaos is %f %%.",
|
||||
encoding_iana,
|
||||
early_stop_count,
|
||||
round(mean_mess_ratio * 100, ndigits=3),
|
||||
)
|
||||
# Preparing those fallbacks in case we got nothing.
|
||||
if (
|
||||
enable_fallback
|
||||
and encoding_iana in ["ascii", "utf_8", specified_encoding]
|
||||
and not lazy_str_hard_failure
|
||||
):
|
||||
fallback_entry = CharsetMatch(
|
||||
sequences, encoding_iana, threshold, False, [], decoded_payload
|
||||
)
|
||||
if encoding_iana == specified_encoding:
|
||||
fallback_specified = fallback_entry
|
||||
elif encoding_iana == "ascii":
|
||||
fallback_ascii = fallback_entry
|
||||
else:
|
||||
fallback_u8 = fallback_entry
|
||||
continue
|
||||
|
||||
logger.log(
|
||||
TRACE,
|
||||
"%s passed initial chaos probing. Mean measured chaos is %f %%",
|
||||
encoding_iana,
|
||||
round(mean_mess_ratio * 100, ndigits=3),
|
||||
)
|
||||
|
||||
if not is_multi_byte_decoder:
|
||||
target_languages: List[str] = encoding_languages(encoding_iana)
|
||||
else:
|
||||
target_languages = mb_encoding_languages(encoding_iana)
|
||||
|
||||
if target_languages:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"{} should target any language(s) of {}".format(
|
||||
encoding_iana, str(target_languages)
|
||||
),
|
||||
)
|
||||
|
||||
cd_ratios = []
|
||||
|
||||
# We shall skip the CD when its about ASCII
|
||||
# Most of the time its not relevant to run "language-detection" on it.
|
||||
if encoding_iana != "ascii":
|
||||
for chunk in md_chunks:
|
||||
chunk_languages = coherence_ratio(
|
||||
chunk,
|
||||
language_threshold,
|
||||
",".join(target_languages) if target_languages else None,
|
||||
)
|
||||
|
||||
cd_ratios.append(chunk_languages)
|
||||
|
||||
cd_ratios_merged = merge_coherence_ratios(cd_ratios)
|
||||
|
||||
if cd_ratios_merged:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"We detected language {} using {}".format(
|
||||
cd_ratios_merged, encoding_iana
|
||||
),
|
||||
)
|
||||
|
||||
results.append(
|
||||
CharsetMatch(
|
||||
sequences,
|
||||
encoding_iana,
|
||||
mean_mess_ratio,
|
||||
bom_or_sig_available,
|
||||
cd_ratios_merged,
|
||||
decoded_payload,
|
||||
)
|
||||
)
|
||||
|
||||
if (
|
||||
encoding_iana in [specified_encoding, "ascii", "utf_8"]
|
||||
and mean_mess_ratio < 0.1
|
||||
):
|
||||
logger.debug(
|
||||
"Encoding detection: %s is most likely the one.", encoding_iana
|
||||
)
|
||||
if explain:
|
||||
logger.removeHandler(explain_handler)
|
||||
logger.setLevel(previous_logger_level)
|
||||
return CharsetMatches([results[encoding_iana]])
|
||||
|
||||
if encoding_iana == sig_encoding:
|
||||
logger.debug(
|
||||
"Encoding detection: %s is most likely the one as we detected a BOM or SIG within "
|
||||
"the beginning of the sequence.",
|
||||
encoding_iana,
|
||||
)
|
||||
if explain:
|
||||
logger.removeHandler(explain_handler)
|
||||
logger.setLevel(previous_logger_level)
|
||||
return CharsetMatches([results[encoding_iana]])
|
||||
|
||||
if len(results) == 0:
|
||||
if fallback_u8 or fallback_ascii or fallback_specified:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Nothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.",
|
||||
)
|
||||
|
||||
if fallback_specified:
|
||||
logger.debug(
|
||||
"Encoding detection: %s will be used as a fallback match",
|
||||
fallback_specified.encoding,
|
||||
)
|
||||
results.append(fallback_specified)
|
||||
elif (
|
||||
(fallback_u8 and fallback_ascii is None)
|
||||
or (
|
||||
fallback_u8
|
||||
and fallback_ascii
|
||||
and fallback_u8.fingerprint != fallback_ascii.fingerprint
|
||||
)
|
||||
or (fallback_u8 is not None)
|
||||
):
|
||||
logger.debug("Encoding detection: utf_8 will be used as a fallback match")
|
||||
results.append(fallback_u8)
|
||||
elif fallback_ascii:
|
||||
logger.debug("Encoding detection: ascii will be used as a fallback match")
|
||||
results.append(fallback_ascii)
|
||||
|
||||
if results:
|
||||
logger.debug(
|
||||
"Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.",
|
||||
results.best().encoding, # type: ignore
|
||||
len(results) - 1,
|
||||
)
|
||||
else:
|
||||
logger.debug("Encoding detection: Unable to determine any suitable charset.")
|
||||
|
||||
if explain:
|
||||
logger.removeHandler(explain_handler)
|
||||
logger.setLevel(previous_logger_level)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def from_fp(
|
||||
fp: BinaryIO,
|
||||
steps: int = 5,
|
||||
chunk_size: int = 512,
|
||||
threshold: float = 0.20,
|
||||
cp_isolation: Optional[List[str]] = None,
|
||||
cp_exclusion: Optional[List[str]] = None,
|
||||
preemptive_behaviour: bool = True,
|
||||
explain: bool = False,
|
||||
language_threshold: float = 0.1,
|
||||
enable_fallback: bool = True,
|
||||
) -> CharsetMatches:
|
||||
"""
|
||||
Same thing than the function from_bytes but using a file pointer that is already ready.
|
||||
Will not close the file pointer.
|
||||
"""
|
||||
return from_bytes(
|
||||
fp.read(),
|
||||
steps,
|
||||
chunk_size,
|
||||
threshold,
|
||||
cp_isolation,
|
||||
cp_exclusion,
|
||||
preemptive_behaviour,
|
||||
explain,
|
||||
language_threshold,
|
||||
enable_fallback,
|
||||
)
|
||||
|
||||
|
||||
def from_path(
|
||||
path: Union[str, bytes, PathLike], # type: ignore[type-arg]
|
||||
steps: int = 5,
|
||||
chunk_size: int = 512,
|
||||
threshold: float = 0.20,
|
||||
cp_isolation: Optional[List[str]] = None,
|
||||
cp_exclusion: Optional[List[str]] = None,
|
||||
preemptive_behaviour: bool = True,
|
||||
explain: bool = False,
|
||||
language_threshold: float = 0.1,
|
||||
enable_fallback: bool = True,
|
||||
) -> CharsetMatches:
|
||||
"""
|
||||
Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
|
||||
Can raise IOError.
|
||||
"""
|
||||
with open(path, "rb") as fp:
|
||||
return from_fp(
|
||||
fp,
|
||||
steps,
|
||||
chunk_size,
|
||||
threshold,
|
||||
cp_isolation,
|
||||
cp_exclusion,
|
||||
preemptive_behaviour,
|
||||
explain,
|
||||
language_threshold,
|
||||
enable_fallback,
|
||||
)
|
||||
|
||||
|
||||
def is_binary(
|
||||
fp_or_path_or_payload: Union[PathLike, str, BinaryIO, bytes], # type: ignore[type-arg]
|
||||
steps: int = 5,
|
||||
chunk_size: int = 512,
|
||||
threshold: float = 0.20,
|
||||
cp_isolation: Optional[List[str]] = None,
|
||||
cp_exclusion: Optional[List[str]] = None,
|
||||
preemptive_behaviour: bool = True,
|
||||
explain: bool = False,
|
||||
language_threshold: float = 0.1,
|
||||
enable_fallback: bool = False,
|
||||
) -> bool:
|
||||
"""
|
||||
Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
|
||||
Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
|
||||
are disabled to be stricter around ASCII-compatible but unlikely to be a string.
|
||||
"""
|
||||
if isinstance(fp_or_path_or_payload, (str, PathLike)):
|
||||
guesses = from_path(
|
||||
fp_or_path_or_payload,
|
||||
steps=steps,
|
||||
chunk_size=chunk_size,
|
||||
threshold=threshold,
|
||||
cp_isolation=cp_isolation,
|
||||
cp_exclusion=cp_exclusion,
|
||||
preemptive_behaviour=preemptive_behaviour,
|
||||
explain=explain,
|
||||
language_threshold=language_threshold,
|
||||
enable_fallback=enable_fallback,
|
||||
)
|
||||
elif isinstance(
|
||||
fp_or_path_or_payload,
|
||||
(
|
||||
bytes,
|
||||
bytearray,
|
||||
),
|
||||
):
|
||||
guesses = from_bytes(
|
||||
fp_or_path_or_payload,
|
||||
steps=steps,
|
||||
chunk_size=chunk_size,
|
||||
threshold=threshold,
|
||||
cp_isolation=cp_isolation,
|
||||
cp_exclusion=cp_exclusion,
|
||||
preemptive_behaviour=preemptive_behaviour,
|
||||
explain=explain,
|
||||
language_threshold=language_threshold,
|
||||
enable_fallback=enable_fallback,
|
||||
)
|
||||
else:
|
||||
guesses = from_fp(
|
||||
fp_or_path_or_payload,
|
||||
steps=steps,
|
||||
chunk_size=chunk_size,
|
||||
threshold=threshold,
|
||||
cp_isolation=cp_isolation,
|
||||
cp_exclusion=cp_exclusion,
|
||||
preemptive_behaviour=preemptive_behaviour,
|
||||
explain=explain,
|
||||
language_threshold=language_threshold,
|
||||
enable_fallback=enable_fallback,
|
||||
)
|
||||
|
||||
return not guesses
|
@ -1,395 +0,0 @@
|
||||
import importlib
|
||||
from codecs import IncrementalDecoder
|
||||
from collections import Counter
|
||||
from functools import lru_cache
|
||||
from typing import Counter as TypeCounter, Dict, List, Optional, Tuple
|
||||
|
||||
from .constant import (
|
||||
FREQUENCIES,
|
||||
KO_NAMES,
|
||||
LANGUAGE_SUPPORTED_COUNT,
|
||||
TOO_SMALL_SEQUENCE,
|
||||
ZH_NAMES,
|
||||
)
|
||||
from .md import is_suspiciously_successive_range
|
||||
from .models import CoherenceMatches
|
||||
from .utils import (
|
||||
is_accentuated,
|
||||
is_latin,
|
||||
is_multi_byte_encoding,
|
||||
is_unicode_range_secondary,
|
||||
unicode_range,
|
||||
)
|
||||
|
||||
|
||||
def encoding_unicode_range(iana_name: str) -> List[str]:
|
||||
"""
|
||||
Return associated unicode ranges in a single byte code page.
|
||||
"""
|
||||
if is_multi_byte_encoding(iana_name):
|
||||
raise IOError("Function not supported on multi-byte code page")
|
||||
|
||||
decoder = importlib.import_module(
|
||||
"encodings.{}".format(iana_name)
|
||||
).IncrementalDecoder
|
||||
|
||||
p: IncrementalDecoder = decoder(errors="ignore")
|
||||
seen_ranges: Dict[str, int] = {}
|
||||
character_count: int = 0
|
||||
|
||||
for i in range(0x40, 0xFF):
|
||||
chunk: str = p.decode(bytes([i]))
|
||||
|
||||
if chunk:
|
||||
character_range: Optional[str] = unicode_range(chunk)
|
||||
|
||||
if character_range is None:
|
||||
continue
|
||||
|
||||
if is_unicode_range_secondary(character_range) is False:
|
||||
if character_range not in seen_ranges:
|
||||
seen_ranges[character_range] = 0
|
||||
seen_ranges[character_range] += 1
|
||||
character_count += 1
|
||||
|
||||
return sorted(
|
||||
[
|
||||
character_range
|
||||
for character_range in seen_ranges
|
||||
if seen_ranges[character_range] / character_count >= 0.15
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def unicode_range_languages(primary_range: str) -> List[str]:
|
||||
"""
|
||||
Return inferred languages used with a unicode range.
|
||||
"""
|
||||
languages: List[str] = []
|
||||
|
||||
for language, characters in FREQUENCIES.items():
|
||||
for character in characters:
|
||||
if unicode_range(character) == primary_range:
|
||||
languages.append(language)
|
||||
break
|
||||
|
||||
return languages
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def encoding_languages(iana_name: str) -> List[str]:
|
||||
"""
|
||||
Single-byte encoding language association. Some code page are heavily linked to particular language(s).
|
||||
This function does the correspondence.
|
||||
"""
|
||||
unicode_ranges: List[str] = encoding_unicode_range(iana_name)
|
||||
primary_range: Optional[str] = None
|
||||
|
||||
for specified_range in unicode_ranges:
|
||||
if "Latin" not in specified_range:
|
||||
primary_range = specified_range
|
||||
break
|
||||
|
||||
if primary_range is None:
|
||||
return ["Latin Based"]
|
||||
|
||||
return unicode_range_languages(primary_range)
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def mb_encoding_languages(iana_name: str) -> List[str]:
|
||||
"""
|
||||
Multi-byte encoding language association. Some code page are heavily linked to particular language(s).
|
||||
This function does the correspondence.
|
||||
"""
|
||||
if (
|
||||
iana_name.startswith("shift_")
|
||||
or iana_name.startswith("iso2022_jp")
|
||||
or iana_name.startswith("euc_j")
|
||||
or iana_name == "cp932"
|
||||
):
|
||||
return ["Japanese"]
|
||||
if iana_name.startswith("gb") or iana_name in ZH_NAMES:
|
||||
return ["Chinese"]
|
||||
if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES:
|
||||
return ["Korean"]
|
||||
|
||||
return []
|
||||
|
||||
|
||||
@lru_cache(maxsize=LANGUAGE_SUPPORTED_COUNT)
|
||||
def get_target_features(language: str) -> Tuple[bool, bool]:
|
||||
"""
|
||||
Determine main aspects from a supported language if it contains accents and if is pure Latin.
|
||||
"""
|
||||
target_have_accents: bool = False
|
||||
target_pure_latin: bool = True
|
||||
|
||||
for character in FREQUENCIES[language]:
|
||||
if not target_have_accents and is_accentuated(character):
|
||||
target_have_accents = True
|
||||
if target_pure_latin and is_latin(character) is False:
|
||||
target_pure_latin = False
|
||||
|
||||
return target_have_accents, target_pure_latin
|
||||
|
||||
|
||||
def alphabet_languages(
|
||||
characters: List[str], ignore_non_latin: bool = False
|
||||
) -> List[str]:
|
||||
"""
|
||||
Return associated languages associated to given characters.
|
||||
"""
|
||||
languages: List[Tuple[str, float]] = []
|
||||
|
||||
source_have_accents = any(is_accentuated(character) for character in characters)
|
||||
|
||||
for language, language_characters in FREQUENCIES.items():
|
||||
target_have_accents, target_pure_latin = get_target_features(language)
|
||||
|
||||
if ignore_non_latin and target_pure_latin is False:
|
||||
continue
|
||||
|
||||
if target_have_accents is False and source_have_accents:
|
||||
continue
|
||||
|
||||
character_count: int = len(language_characters)
|
||||
|
||||
character_match_count: int = len(
|
||||
[c for c in language_characters if c in characters]
|
||||
)
|
||||
|
||||
ratio: float = character_match_count / character_count
|
||||
|
||||
if ratio >= 0.2:
|
||||
languages.append((language, ratio))
|
||||
|
||||
languages = sorted(languages, key=lambda x: x[1], reverse=True)
|
||||
|
||||
return [compatible_language[0] for compatible_language in languages]
|
||||
|
||||
|
||||
def characters_popularity_compare(
|
||||
language: str, ordered_characters: List[str]
|
||||
) -> float:
|
||||
"""
|
||||
Determine if a ordered characters list (by occurrence from most appearance to rarest) match a particular language.
|
||||
The result is a ratio between 0. (absolutely no correspondence) and 1. (near perfect fit).
|
||||
Beware that is function is not strict on the match in order to ease the detection. (Meaning close match is 1.)
|
||||
"""
|
||||
if language not in FREQUENCIES:
|
||||
raise ValueError("{} not available".format(language))
|
||||
|
||||
character_approved_count: int = 0
|
||||
FREQUENCIES_language_set = set(FREQUENCIES[language])
|
||||
|
||||
ordered_characters_count: int = len(ordered_characters)
|
||||
target_language_characters_count: int = len(FREQUENCIES[language])
|
||||
|
||||
large_alphabet: bool = target_language_characters_count > 26
|
||||
|
||||
for character, character_rank in zip(
|
||||
ordered_characters, range(0, ordered_characters_count)
|
||||
):
|
||||
if character not in FREQUENCIES_language_set:
|
||||
continue
|
||||
|
||||
character_rank_in_language: int = FREQUENCIES[language].index(character)
|
||||
expected_projection_ratio: float = (
|
||||
target_language_characters_count / ordered_characters_count
|
||||
)
|
||||
character_rank_projection: int = int(character_rank * expected_projection_ratio)
|
||||
|
||||
if (
|
||||
large_alphabet is False
|
||||
and abs(character_rank_projection - character_rank_in_language) > 4
|
||||
):
|
||||
continue
|
||||
|
||||
if (
|
||||
large_alphabet is True
|
||||
and abs(character_rank_projection - character_rank_in_language)
|
||||
< target_language_characters_count / 3
|
||||
):
|
||||
character_approved_count += 1
|
||||
continue
|
||||
|
||||
characters_before_source: List[str] = FREQUENCIES[language][
|
||||
0:character_rank_in_language
|
||||
]
|
||||
characters_after_source: List[str] = FREQUENCIES[language][
|
||||
character_rank_in_language:
|
||||
]
|
||||
characters_before: List[str] = ordered_characters[0:character_rank]
|
||||
characters_after: List[str] = ordered_characters[character_rank:]
|
||||
|
||||
before_match_count: int = len(
|
||||
set(characters_before) & set(characters_before_source)
|
||||
)
|
||||
|
||||
after_match_count: int = len(
|
||||
set(characters_after) & set(characters_after_source)
|
||||
)
|
||||
|
||||
if len(characters_before_source) == 0 and before_match_count <= 4:
|
||||
character_approved_count += 1
|
||||
continue
|
||||
|
||||
if len(characters_after_source) == 0 and after_match_count <= 4:
|
||||
character_approved_count += 1
|
||||
continue
|
||||
|
||||
if (
|
||||
before_match_count / len(characters_before_source) >= 0.4
|
||||
or after_match_count / len(characters_after_source) >= 0.4
|
||||
):
|
||||
character_approved_count += 1
|
||||
continue
|
||||
|
||||
return character_approved_count / len(ordered_characters)
|
||||
|
||||
|
||||
def alpha_unicode_split(decoded_sequence: str) -> List[str]:
|
||||
"""
|
||||
Given a decoded text sequence, return a list of str. Unicode range / alphabet separation.
|
||||
Ex. a text containing English/Latin with a bit a Hebrew will return two items in the resulting list;
|
||||
One containing the latin letters and the other hebrew.
|
||||
"""
|
||||
layers: Dict[str, str] = {}
|
||||
|
||||
for character in decoded_sequence:
|
||||
if character.isalpha() is False:
|
||||
continue
|
||||
|
||||
character_range: Optional[str] = unicode_range(character)
|
||||
|
||||
if character_range is None:
|
||||
continue
|
||||
|
||||
layer_target_range: Optional[str] = None
|
||||
|
||||
for discovered_range in layers:
|
||||
if (
|
||||
is_suspiciously_successive_range(discovered_range, character_range)
|
||||
is False
|
||||
):
|
||||
layer_target_range = discovered_range
|
||||
break
|
||||
|
||||
if layer_target_range is None:
|
||||
layer_target_range = character_range
|
||||
|
||||
if layer_target_range not in layers:
|
||||
layers[layer_target_range] = character.lower()
|
||||
continue
|
||||
|
||||
layers[layer_target_range] += character.lower()
|
||||
|
||||
return list(layers.values())
|
||||
|
||||
|
||||
def merge_coherence_ratios(results: List[CoherenceMatches]) -> CoherenceMatches:
|
||||
"""
|
||||
This function merge results previously given by the function coherence_ratio.
|
||||
The return type is the same as coherence_ratio.
|
||||
"""
|
||||
per_language_ratios: Dict[str, List[float]] = {}
|
||||
for result in results:
|
||||
for sub_result in result:
|
||||
language, ratio = sub_result
|
||||
if language not in per_language_ratios:
|
||||
per_language_ratios[language] = [ratio]
|
||||
continue
|
||||
per_language_ratios[language].append(ratio)
|
||||
|
||||
merge = [
|
||||
(
|
||||
language,
|
||||
round(
|
||||
sum(per_language_ratios[language]) / len(per_language_ratios[language]),
|
||||
4,
|
||||
),
|
||||
)
|
||||
for language in per_language_ratios
|
||||
]
|
||||
|
||||
return sorted(merge, key=lambda x: x[1], reverse=True)
|
||||
|
||||
|
||||
def filter_alt_coherence_matches(results: CoherenceMatches) -> CoherenceMatches:
|
||||
"""
|
||||
We shall NOT return "English—" in CoherenceMatches because it is an alternative
|
||||
of "English". This function only keeps the best match and remove the em-dash in it.
|
||||
"""
|
||||
index_results: Dict[str, List[float]] = dict()
|
||||
|
||||
for result in results:
|
||||
language, ratio = result
|
||||
no_em_name: str = language.replace("—", "")
|
||||
|
||||
if no_em_name not in index_results:
|
||||
index_results[no_em_name] = []
|
||||
|
||||
index_results[no_em_name].append(ratio)
|
||||
|
||||
if any(len(index_results[e]) > 1 for e in index_results):
|
||||
filtered_results: CoherenceMatches = []
|
||||
|
||||
for language in index_results:
|
||||
filtered_results.append((language, max(index_results[language])))
|
||||
|
||||
return filtered_results
|
||||
|
||||
return results
|
||||
|
||||
|
||||
@lru_cache(maxsize=2048)
|
||||
def coherence_ratio(
|
||||
decoded_sequence: str, threshold: float = 0.1, lg_inclusion: Optional[str] = None
|
||||
) -> CoherenceMatches:
|
||||
"""
|
||||
Detect ANY language that can be identified in given sequence. The sequence will be analysed by layers.
|
||||
A layer = Character extraction by alphabets/ranges.
|
||||
"""
|
||||
|
||||
results: List[Tuple[str, float]] = []
|
||||
ignore_non_latin: bool = False
|
||||
|
||||
sufficient_match_count: int = 0
|
||||
|
||||
lg_inclusion_list = lg_inclusion.split(",") if lg_inclusion is not None else []
|
||||
if "Latin Based" in lg_inclusion_list:
|
||||
ignore_non_latin = True
|
||||
lg_inclusion_list.remove("Latin Based")
|
||||
|
||||
for layer in alpha_unicode_split(decoded_sequence):
|
||||
sequence_frequencies: TypeCounter[str] = Counter(layer)
|
||||
most_common = sequence_frequencies.most_common()
|
||||
|
||||
character_count: int = sum(o for c, o in most_common)
|
||||
|
||||
if character_count <= TOO_SMALL_SEQUENCE:
|
||||
continue
|
||||
|
||||
popular_character_ordered: List[str] = [c for c, o in most_common]
|
||||
|
||||
for language in lg_inclusion_list or alphabet_languages(
|
||||
popular_character_ordered, ignore_non_latin
|
||||
):
|
||||
ratio: float = characters_popularity_compare(
|
||||
language, popular_character_ordered
|
||||
)
|
||||
|
||||
if ratio < threshold:
|
||||
continue
|
||||
elif ratio >= 0.8:
|
||||
sufficient_match_count += 1
|
||||
|
||||
results.append((language, round(ratio, 4)))
|
||||
|
||||
if sufficient_match_count >= 3:
|
||||
break
|
||||
|
||||
return sorted(
|
||||
filter_alt_coherence_matches(results), key=lambda x: x[1], reverse=True
|
||||
)
|
@ -1,6 +0,0 @@
|
||||
from .__main__ import cli_detect, query_yes_no
|
||||
|
||||
__all__ = (
|
||||
"cli_detect",
|
||||
"query_yes_no",
|
||||
)
|
@ -1,296 +0,0 @@
|
||||
import argparse
|
||||
import sys
|
||||
from json import dumps
|
||||
from os.path import abspath, basename, dirname, join, realpath
|
||||
from platform import python_version
|
||||
from typing import List, Optional
|
||||
from unicodedata import unidata_version
|
||||
|
||||
import charset_normalizer.md as md_module
|
||||
from charset_normalizer import from_fp
|
||||
from charset_normalizer.models import CliDetectionResult
|
||||
from charset_normalizer.version import __version__
|
||||
|
||||
|
||||
def query_yes_no(question: str, default: str = "yes") -> bool:
|
||||
"""Ask a yes/no question via input() and return their answer.
|
||||
|
||||
"question" is a string that is presented to the user.
|
||||
"default" is the presumed answer if the user just hits <Enter>.
|
||||
It must be "yes" (the default), "no" or None (meaning
|
||||
an answer is required of the user).
|
||||
|
||||
The "answer" return value is True for "yes" or False for "no".
|
||||
|
||||
Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input
|
||||
"""
|
||||
valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
|
||||
if default is None:
|
||||
prompt = " [y/n] "
|
||||
elif default == "yes":
|
||||
prompt = " [Y/n] "
|
||||
elif default == "no":
|
||||
prompt = " [y/N] "
|
||||
else:
|
||||
raise ValueError("invalid default answer: '%s'" % default)
|
||||
|
||||
while True:
|
||||
sys.stdout.write(question + prompt)
|
||||
choice = input().lower()
|
||||
if default is not None and choice == "":
|
||||
return valid[default]
|
||||
elif choice in valid:
|
||||
return valid[choice]
|
||||
else:
|
||||
sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
|
||||
|
||||
|
||||
def cli_detect(argv: Optional[List[str]] = None) -> int:
|
||||
"""
|
||||
CLI assistant using ARGV and ArgumentParser
|
||||
:param argv:
|
||||
:return: 0 if everything is fine, anything else equal trouble
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="The Real First Universal Charset Detector. "
|
||||
"Discover originating encoding used on text file. "
|
||||
"Normalize text to unicode."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"files", type=argparse.FileType("rb"), nargs="+", help="File(s) to be analysed"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
action="store_true",
|
||||
default=False,
|
||||
dest="verbose",
|
||||
help="Display complementary information about file if any. "
|
||||
"Stdout will contain logs about the detection process.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-a",
|
||||
"--with-alternative",
|
||||
action="store_true",
|
||||
default=False,
|
||||
dest="alternatives",
|
||||
help="Output complementary possibilities if any. Top-level JSON WILL be a list.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-n",
|
||||
"--normalize",
|
||||
action="store_true",
|
||||
default=False,
|
||||
dest="normalize",
|
||||
help="Permit to normalize input file. If not set, program does not write anything.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--minimal",
|
||||
action="store_true",
|
||||
default=False,
|
||||
dest="minimal",
|
||||
help="Only output the charset detected to STDOUT. Disabling JSON output.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-r",
|
||||
"--replace",
|
||||
action="store_true",
|
||||
default=False,
|
||||
dest="replace",
|
||||
help="Replace file when trying to normalize it instead of creating a new one.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--force",
|
||||
action="store_true",
|
||||
default=False,
|
||||
dest="force",
|
||||
help="Replace file without asking if you are sure, use this flag with caution.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t",
|
||||
"--threshold",
|
||||
action="store",
|
||||
default=0.2,
|
||||
type=float,
|
||||
dest="threshold",
|
||||
help="Define a custom maximum amount of chaos allowed in decoded content. 0. <= chaos <= 1.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--version",
|
||||
action="version",
|
||||
version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
|
||||
__version__,
|
||||
python_version(),
|
||||
unidata_version,
|
||||
"OFF" if md_module.__file__.lower().endswith(".py") else "ON",
|
||||
),
|
||||
help="Show version information and exit.",
|
||||
)
|
||||
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
if args.replace is True and args.normalize is False:
|
||||
print("Use --replace in addition of --normalize only.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if args.force is True and args.replace is False:
|
||||
print("Use --force in addition of --replace only.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if args.threshold < 0.0 or args.threshold > 1.0:
|
||||
print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
x_ = []
|
||||
|
||||
for my_file in args.files:
|
||||
matches = from_fp(my_file, threshold=args.threshold, explain=args.verbose)
|
||||
|
||||
best_guess = matches.best()
|
||||
|
||||
if best_guess is None:
|
||||
print(
|
||||
'Unable to identify originating encoding for "{}". {}'.format(
|
||||
my_file.name,
|
||||
"Maybe try increasing maximum amount of chaos."
|
||||
if args.threshold < 1.0
|
||||
else "",
|
||||
),
|
||||
file=sys.stderr,
|
||||
)
|
||||
x_.append(
|
||||
CliDetectionResult(
|
||||
abspath(my_file.name),
|
||||
None,
|
||||
[],
|
||||
[],
|
||||
"Unknown",
|
||||
[],
|
||||
False,
|
||||
1.0,
|
||||
0.0,
|
||||
None,
|
||||
True,
|
||||
)
|
||||
)
|
||||
else:
|
||||
x_.append(
|
||||
CliDetectionResult(
|
||||
abspath(my_file.name),
|
||||
best_guess.encoding,
|
||||
best_guess.encoding_aliases,
|
||||
[
|
||||
cp
|
||||
for cp in best_guess.could_be_from_charset
|
||||
if cp != best_guess.encoding
|
||||
],
|
||||
best_guess.language,
|
||||
best_guess.alphabets,
|
||||
best_guess.bom,
|
||||
best_guess.percent_chaos,
|
||||
best_guess.percent_coherence,
|
||||
None,
|
||||
True,
|
||||
)
|
||||
)
|
||||
|
||||
if len(matches) > 1 and args.alternatives:
|
||||
for el in matches:
|
||||
if el != best_guess:
|
||||
x_.append(
|
||||
CliDetectionResult(
|
||||
abspath(my_file.name),
|
||||
el.encoding,
|
||||
el.encoding_aliases,
|
||||
[
|
||||
cp
|
||||
for cp in el.could_be_from_charset
|
||||
if cp != el.encoding
|
||||
],
|
||||
el.language,
|
||||
el.alphabets,
|
||||
el.bom,
|
||||
el.percent_chaos,
|
||||
el.percent_coherence,
|
||||
None,
|
||||
False,
|
||||
)
|
||||
)
|
||||
|
||||
if args.normalize is True:
|
||||
if best_guess.encoding.startswith("utf") is True:
|
||||
print(
|
||||
'"{}" file does not need to be normalized, as it already came from unicode.'.format(
|
||||
my_file.name
|
||||
),
|
||||
file=sys.stderr,
|
||||
)
|
||||
if my_file.closed is False:
|
||||
my_file.close()
|
||||
continue
|
||||
|
||||
dir_path = dirname(realpath(my_file.name))
|
||||
file_name = basename(realpath(my_file.name))
|
||||
|
||||
o_: List[str] = file_name.split(".")
|
||||
|
||||
if args.replace is False:
|
||||
o_.insert(-1, best_guess.encoding)
|
||||
if my_file.closed is False:
|
||||
my_file.close()
|
||||
elif (
|
||||
args.force is False
|
||||
and query_yes_no(
|
||||
'Are you sure to normalize "{}" by replacing it ?'.format(
|
||||
my_file.name
|
||||
),
|
||||
"no",
|
||||
)
|
||||
is False
|
||||
):
|
||||
if my_file.closed is False:
|
||||
my_file.close()
|
||||
continue
|
||||
|
||||
try:
|
||||
x_[0].unicode_path = join(dir_path, ".".join(o_))
|
||||
|
||||
with open(x_[0].unicode_path, "w", encoding="utf-8") as fp:
|
||||
fp.write(str(best_guess))
|
||||
except IOError as e:
|
||||
print(str(e), file=sys.stderr)
|
||||
if my_file.closed is False:
|
||||
my_file.close()
|
||||
return 2
|
||||
|
||||
if my_file.closed is False:
|
||||
my_file.close()
|
||||
|
||||
if args.minimal is False:
|
||||
print(
|
||||
dumps(
|
||||
[el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__,
|
||||
ensure_ascii=True,
|
||||
indent=4,
|
||||
)
|
||||
)
|
||||
else:
|
||||
for my_file in args.files:
|
||||
print(
|
||||
", ".join(
|
||||
[
|
||||
el.encoding or "undefined"
|
||||
for el in x_
|
||||
if el.path == abspath(my_file.name)
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli_detect()
|
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -1,54 +0,0 @@
|
||||
from typing import Any, Dict, Optional, Union
|
||||
from warnings import warn
|
||||
|
||||
from .api import from_bytes
|
||||
from .constant import CHARDET_CORRESPONDENCE
|
||||
|
||||
|
||||
def detect(
|
||||
byte_str: bytes, should_rename_legacy: bool = False, **kwargs: Any
|
||||
) -> Dict[str, Optional[Union[str, float]]]:
|
||||
"""
|
||||
chardet legacy method
|
||||
Detect the encoding of the given byte string. It should be mostly backward-compatible.
|
||||
Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it)
|
||||
This function is deprecated and should be used to migrate your project easily, consult the documentation for
|
||||
further information. Not planned for removal.
|
||||
|
||||
:param byte_str: The byte sequence to examine.
|
||||
:param should_rename_legacy: Should we rename legacy encodings
|
||||
to their more modern equivalents?
|
||||
"""
|
||||
if len(kwargs):
|
||||
warn(
|
||||
f"charset-normalizer disregard arguments '{','.join(list(kwargs.keys()))}' in legacy function detect()"
|
||||
)
|
||||
|
||||
if not isinstance(byte_str, (bytearray, bytes)):
|
||||
raise TypeError( # pragma: nocover
|
||||
"Expected object of type bytes or bytearray, got: "
|
||||
"{0}".format(type(byte_str))
|
||||
)
|
||||
|
||||
if isinstance(byte_str, bytearray):
|
||||
byte_str = bytes(byte_str)
|
||||
|
||||
r = from_bytes(byte_str).best()
|
||||
|
||||
encoding = r.encoding if r is not None else None
|
||||
language = r.language if r is not None and r.language != "Unknown" else ""
|
||||
confidence = 1.0 - r.chaos if r is not None else None
|
||||
|
||||
# Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process
|
||||
# but chardet does return 'utf-8-sig' and it is a valid codec name.
|
||||
if r is not None and encoding == "utf_8" and r.bom:
|
||||
encoding += "_sig"
|
||||
|
||||
if should_rename_legacy is False and encoding in CHARDET_CORRESPONDENCE:
|
||||
encoding = CHARDET_CORRESPONDENCE[encoding]
|
||||
|
||||
return {
|
||||
"encoding": encoding,
|
||||
"language": language,
|
||||
"confidence": confidence,
|
||||
}
|
Binary file not shown.
@ -1,615 +0,0 @@
|
||||
from functools import lru_cache
|
||||
from logging import getLogger
|
||||
from typing import List, Optional
|
||||
|
||||
from .constant import (
|
||||
COMMON_SAFE_ASCII_CHARACTERS,
|
||||
TRACE,
|
||||
UNICODE_SECONDARY_RANGE_KEYWORD,
|
||||
)
|
||||
from .utils import (
|
||||
is_accentuated,
|
||||
is_arabic,
|
||||
is_arabic_isolated_form,
|
||||
is_case_variable,
|
||||
is_cjk,
|
||||
is_emoticon,
|
||||
is_hangul,
|
||||
is_hiragana,
|
||||
is_katakana,
|
||||
is_latin,
|
||||
is_punctuation,
|
||||
is_separator,
|
||||
is_symbol,
|
||||
is_thai,
|
||||
is_unprintable,
|
||||
remove_accent,
|
||||
unicode_range,
|
||||
)
|
||||
|
||||
|
||||
class MessDetectorPlugin:
|
||||
"""
|
||||
Base abstract class used for mess detection plugins.
|
||||
All detectors MUST extend and implement given methods.
|
||||
"""
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
"""
|
||||
Determine if given character should be fed in.
|
||||
"""
|
||||
raise NotImplementedError # pragma: nocover
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
"""
|
||||
The main routine to be executed upon character.
|
||||
Insert the logic in witch the text would be considered chaotic.
|
||||
"""
|
||||
raise NotImplementedError # pragma: nocover
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
"""
|
||||
Permit to reset the plugin to the initial state.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
"""
|
||||
Compute the chaos ratio based on what your feed() has seen.
|
||||
Must NOT be lower than 0.; No restriction gt 0.
|
||||
"""
|
||||
raise NotImplementedError # pragma: nocover
|
||||
|
||||
|
||||
class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin):
|
||||
def __init__(self) -> None:
|
||||
self._punctuation_count: int = 0
|
||||
self._symbol_count: int = 0
|
||||
self._character_count: int = 0
|
||||
|
||||
self._last_printable_char: Optional[str] = None
|
||||
self._frenzy_symbol_in_word: bool = False
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return character.isprintable()
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
self._character_count += 1
|
||||
|
||||
if (
|
||||
character != self._last_printable_char
|
||||
and character not in COMMON_SAFE_ASCII_CHARACTERS
|
||||
):
|
||||
if is_punctuation(character):
|
||||
self._punctuation_count += 1
|
||||
elif (
|
||||
character.isdigit() is False
|
||||
and is_symbol(character)
|
||||
and is_emoticon(character) is False
|
||||
):
|
||||
self._symbol_count += 2
|
||||
|
||||
self._last_printable_char = character
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._punctuation_count = 0
|
||||
self._character_count = 0
|
||||
self._symbol_count = 0
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._character_count == 0:
|
||||
return 0.0
|
||||
|
||||
ratio_of_punctuation: float = (
|
||||
self._punctuation_count + self._symbol_count
|
||||
) / self._character_count
|
||||
|
||||
return ratio_of_punctuation if ratio_of_punctuation >= 0.3 else 0.0
|
||||
|
||||
|
||||
class TooManyAccentuatedPlugin(MessDetectorPlugin):
|
||||
def __init__(self) -> None:
|
||||
self._character_count: int = 0
|
||||
self._accentuated_count: int = 0
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return character.isalpha()
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
self._character_count += 1
|
||||
|
||||
if is_accentuated(character):
|
||||
self._accentuated_count += 1
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._character_count = 0
|
||||
self._accentuated_count = 0
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._character_count < 8:
|
||||
return 0.0
|
||||
|
||||
ratio_of_accentuation: float = self._accentuated_count / self._character_count
|
||||
return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0
|
||||
|
||||
|
||||
class UnprintablePlugin(MessDetectorPlugin):
|
||||
def __init__(self) -> None:
|
||||
self._unprintable_count: int = 0
|
||||
self._character_count: int = 0
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return True
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
if is_unprintable(character):
|
||||
self._unprintable_count += 1
|
||||
self._character_count += 1
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._unprintable_count = 0
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._character_count == 0:
|
||||
return 0.0
|
||||
|
||||
return (self._unprintable_count * 8) / self._character_count
|
||||
|
||||
|
||||
class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin):
|
||||
def __init__(self) -> None:
|
||||
self._successive_count: int = 0
|
||||
self._character_count: int = 0
|
||||
|
||||
self._last_latin_character: Optional[str] = None
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return character.isalpha() and is_latin(character)
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
self._character_count += 1
|
||||
if (
|
||||
self._last_latin_character is not None
|
||||
and is_accentuated(character)
|
||||
and is_accentuated(self._last_latin_character)
|
||||
):
|
||||
if character.isupper() and self._last_latin_character.isupper():
|
||||
self._successive_count += 1
|
||||
# Worse if its the same char duplicated with different accent.
|
||||
if remove_accent(character) == remove_accent(self._last_latin_character):
|
||||
self._successive_count += 1
|
||||
self._last_latin_character = character
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._successive_count = 0
|
||||
self._character_count = 0
|
||||
self._last_latin_character = None
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._character_count == 0:
|
||||
return 0.0
|
||||
|
||||
return (self._successive_count * 2) / self._character_count
|
||||
|
||||
|
||||
class SuspiciousRange(MessDetectorPlugin):
|
||||
def __init__(self) -> None:
|
||||
self._suspicious_successive_range_count: int = 0
|
||||
self._character_count: int = 0
|
||||
self._last_printable_seen: Optional[str] = None
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return character.isprintable()
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
self._character_count += 1
|
||||
|
||||
if (
|
||||
character.isspace()
|
||||
or is_punctuation(character)
|
||||
or character in COMMON_SAFE_ASCII_CHARACTERS
|
||||
):
|
||||
self._last_printable_seen = None
|
||||
return
|
||||
|
||||
if self._last_printable_seen is None:
|
||||
self._last_printable_seen = character
|
||||
return
|
||||
|
||||
unicode_range_a: Optional[str] = unicode_range(self._last_printable_seen)
|
||||
unicode_range_b: Optional[str] = unicode_range(character)
|
||||
|
||||
if is_suspiciously_successive_range(unicode_range_a, unicode_range_b):
|
||||
self._suspicious_successive_range_count += 1
|
||||
|
||||
self._last_printable_seen = character
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._character_count = 0
|
||||
self._suspicious_successive_range_count = 0
|
||||
self._last_printable_seen = None
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._character_count <= 24:
|
||||
return 0.0
|
||||
|
||||
ratio_of_suspicious_range_usage: float = (
|
||||
self._suspicious_successive_range_count * 2
|
||||
) / self._character_count
|
||||
|
||||
return ratio_of_suspicious_range_usage
|
||||
|
||||
|
||||
class SuperWeirdWordPlugin(MessDetectorPlugin):
|
||||
def __init__(self) -> None:
|
||||
self._word_count: int = 0
|
||||
self._bad_word_count: int = 0
|
||||
self._foreign_long_count: int = 0
|
||||
|
||||
self._is_current_word_bad: bool = False
|
||||
self._foreign_long_watch: bool = False
|
||||
|
||||
self._character_count: int = 0
|
||||
self._bad_character_count: int = 0
|
||||
|
||||
self._buffer: str = ""
|
||||
self._buffer_accent_count: int = 0
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return True
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
if character.isalpha():
|
||||
self._buffer += character
|
||||
if is_accentuated(character):
|
||||
self._buffer_accent_count += 1
|
||||
if (
|
||||
self._foreign_long_watch is False
|
||||
and (is_latin(character) is False or is_accentuated(character))
|
||||
and is_cjk(character) is False
|
||||
and is_hangul(character) is False
|
||||
and is_katakana(character) is False
|
||||
and is_hiragana(character) is False
|
||||
and is_thai(character) is False
|
||||
):
|
||||
self._foreign_long_watch = True
|
||||
return
|
||||
if not self._buffer:
|
||||
return
|
||||
if (
|
||||
character.isspace() or is_punctuation(character) or is_separator(character)
|
||||
) and self._buffer:
|
||||
self._word_count += 1
|
||||
buffer_length: int = len(self._buffer)
|
||||
|
||||
self._character_count += buffer_length
|
||||
|
||||
if buffer_length >= 4:
|
||||
if self._buffer_accent_count / buffer_length > 0.34:
|
||||
self._is_current_word_bad = True
|
||||
# Word/Buffer ending with an upper case accentuated letter are so rare,
|
||||
# that we will consider them all as suspicious. Same weight as foreign_long suspicious.
|
||||
if (
|
||||
is_accentuated(self._buffer[-1])
|
||||
and self._buffer[-1].isupper()
|
||||
and all(_.isupper() for _ in self._buffer) is False
|
||||
):
|
||||
self._foreign_long_count += 1
|
||||
self._is_current_word_bad = True
|
||||
if buffer_length >= 24 and self._foreign_long_watch:
|
||||
camel_case_dst = [
|
||||
i
|
||||
for c, i in zip(self._buffer, range(0, buffer_length))
|
||||
if c.isupper()
|
||||
]
|
||||
probable_camel_cased: bool = False
|
||||
|
||||
if camel_case_dst and (len(camel_case_dst) / buffer_length <= 0.3):
|
||||
probable_camel_cased = True
|
||||
|
||||
if not probable_camel_cased:
|
||||
self._foreign_long_count += 1
|
||||
self._is_current_word_bad = True
|
||||
|
||||
if self._is_current_word_bad:
|
||||
self._bad_word_count += 1
|
||||
self._bad_character_count += len(self._buffer)
|
||||
self._is_current_word_bad = False
|
||||
|
||||
self._foreign_long_watch = False
|
||||
self._buffer = ""
|
||||
self._buffer_accent_count = 0
|
||||
elif (
|
||||
character not in {"<", ">", "-", "=", "~", "|", "_"}
|
||||
and character.isdigit() is False
|
||||
and is_symbol(character)
|
||||
):
|
||||
self._is_current_word_bad = True
|
||||
self._buffer += character
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._buffer = ""
|
||||
self._is_current_word_bad = False
|
||||
self._foreign_long_watch = False
|
||||
self._bad_word_count = 0
|
||||
self._word_count = 0
|
||||
self._character_count = 0
|
||||
self._bad_character_count = 0
|
||||
self._foreign_long_count = 0
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._word_count <= 10 and self._foreign_long_count == 0:
|
||||
return 0.0
|
||||
|
||||
return self._bad_character_count / self._character_count
|
||||
|
||||
|
||||
class CjkInvalidStopPlugin(MessDetectorPlugin):
|
||||
"""
|
||||
GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and
|
||||
can be easily detected. Searching for the overuse of '丅' and '丄'.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._wrong_stop_count: int = 0
|
||||
self._cjk_character_count: int = 0
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return True
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
if character in {"丅", "丄"}:
|
||||
self._wrong_stop_count += 1
|
||||
return
|
||||
if is_cjk(character):
|
||||
self._cjk_character_count += 1
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._wrong_stop_count = 0
|
||||
self._cjk_character_count = 0
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._cjk_character_count < 16:
|
||||
return 0.0
|
||||
return self._wrong_stop_count / self._cjk_character_count
|
||||
|
||||
|
||||
class ArchaicUpperLowerPlugin(MessDetectorPlugin):
|
||||
def __init__(self) -> None:
|
||||
self._buf: bool = False
|
||||
|
||||
self._character_count_since_last_sep: int = 0
|
||||
|
||||
self._successive_upper_lower_count: int = 0
|
||||
self._successive_upper_lower_count_final: int = 0
|
||||
|
||||
self._character_count: int = 0
|
||||
|
||||
self._last_alpha_seen: Optional[str] = None
|
||||
self._current_ascii_only: bool = True
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return True
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
is_concerned = character.isalpha() and is_case_variable(character)
|
||||
chunk_sep = is_concerned is False
|
||||
|
||||
if chunk_sep and self._character_count_since_last_sep > 0:
|
||||
if (
|
||||
self._character_count_since_last_sep <= 64
|
||||
and character.isdigit() is False
|
||||
and self._current_ascii_only is False
|
||||
):
|
||||
self._successive_upper_lower_count_final += (
|
||||
self._successive_upper_lower_count
|
||||
)
|
||||
|
||||
self._successive_upper_lower_count = 0
|
||||
self._character_count_since_last_sep = 0
|
||||
self._last_alpha_seen = None
|
||||
self._buf = False
|
||||
self._character_count += 1
|
||||
self._current_ascii_only = True
|
||||
|
||||
return
|
||||
|
||||
if self._current_ascii_only is True and character.isascii() is False:
|
||||
self._current_ascii_only = False
|
||||
|
||||
if self._last_alpha_seen is not None:
|
||||
if (character.isupper() and self._last_alpha_seen.islower()) or (
|
||||
character.islower() and self._last_alpha_seen.isupper()
|
||||
):
|
||||
if self._buf is True:
|
||||
self._successive_upper_lower_count += 2
|
||||
self._buf = False
|
||||
else:
|
||||
self._buf = True
|
||||
else:
|
||||
self._buf = False
|
||||
|
||||
self._character_count += 1
|
||||
self._character_count_since_last_sep += 1
|
||||
self._last_alpha_seen = character
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._character_count = 0
|
||||
self._character_count_since_last_sep = 0
|
||||
self._successive_upper_lower_count = 0
|
||||
self._successive_upper_lower_count_final = 0
|
||||
self._last_alpha_seen = None
|
||||
self._buf = False
|
||||
self._current_ascii_only = True
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._character_count == 0:
|
||||
return 0.0
|
||||
|
||||
return self._successive_upper_lower_count_final / self._character_count
|
||||
|
||||
|
||||
class ArabicIsolatedFormPlugin(MessDetectorPlugin):
|
||||
def __init__(self) -> None:
|
||||
self._character_count: int = 0
|
||||
self._isolated_form_count: int = 0
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._character_count = 0
|
||||
self._isolated_form_count = 0
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return is_arabic(character)
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
self._character_count += 1
|
||||
|
||||
if is_arabic_isolated_form(character):
|
||||
self._isolated_form_count += 1
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._character_count < 8:
|
||||
return 0.0
|
||||
|
||||
isolated_form_usage: float = self._isolated_form_count / self._character_count
|
||||
|
||||
return isolated_form_usage
|
||||
|
||||
|
||||
@lru_cache(maxsize=1024)
|
||||
def is_suspiciously_successive_range(
|
||||
unicode_range_a: Optional[str], unicode_range_b: Optional[str]
|
||||
) -> bool:
|
||||
"""
|
||||
Determine if two Unicode range seen next to each other can be considered as suspicious.
|
||||
"""
|
||||
if unicode_range_a is None or unicode_range_b is None:
|
||||
return True
|
||||
|
||||
if unicode_range_a == unicode_range_b:
|
||||
return False
|
||||
|
||||
if "Latin" in unicode_range_a and "Latin" in unicode_range_b:
|
||||
return False
|
||||
|
||||
if "Emoticons" in unicode_range_a or "Emoticons" in unicode_range_b:
|
||||
return False
|
||||
|
||||
# Latin characters can be accompanied with a combining diacritical mark
|
||||
# eg. Vietnamese.
|
||||
if ("Latin" in unicode_range_a or "Latin" in unicode_range_b) and (
|
||||
"Combining" in unicode_range_a or "Combining" in unicode_range_b
|
||||
):
|
||||
return False
|
||||
|
||||
keywords_range_a, keywords_range_b = unicode_range_a.split(
|
||||
" "
|
||||
), unicode_range_b.split(" ")
|
||||
|
||||
for el in keywords_range_a:
|
||||
if el in UNICODE_SECONDARY_RANGE_KEYWORD:
|
||||
continue
|
||||
if el in keywords_range_b:
|
||||
return False
|
||||
|
||||
# Japanese Exception
|
||||
range_a_jp_chars, range_b_jp_chars = (
|
||||
unicode_range_a
|
||||
in (
|
||||
"Hiragana",
|
||||
"Katakana",
|
||||
),
|
||||
unicode_range_b in ("Hiragana", "Katakana"),
|
||||
)
|
||||
if (range_a_jp_chars or range_b_jp_chars) and (
|
||||
"CJK" in unicode_range_a or "CJK" in unicode_range_b
|
||||
):
|
||||
return False
|
||||
if range_a_jp_chars and range_b_jp_chars:
|
||||
return False
|
||||
|
||||
if "Hangul" in unicode_range_a or "Hangul" in unicode_range_b:
|
||||
if "CJK" in unicode_range_a or "CJK" in unicode_range_b:
|
||||
return False
|
||||
if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
|
||||
return False
|
||||
|
||||
# Chinese/Japanese use dedicated range for punctuation and/or separators.
|
||||
if ("CJK" in unicode_range_a or "CJK" in unicode_range_b) or (
|
||||
unicode_range_a in ["Katakana", "Hiragana"]
|
||||
and unicode_range_b in ["Katakana", "Hiragana"]
|
||||
):
|
||||
if "Punctuation" in unicode_range_a or "Punctuation" in unicode_range_b:
|
||||
return False
|
||||
if "Forms" in unicode_range_a or "Forms" in unicode_range_b:
|
||||
return False
|
||||
if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@lru_cache(maxsize=2048)
|
||||
def mess_ratio(
|
||||
decoded_sequence: str, maximum_threshold: float = 0.2, debug: bool = False
|
||||
) -> float:
|
||||
"""
|
||||
Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
|
||||
"""
|
||||
|
||||
detectors: List[MessDetectorPlugin] = [
|
||||
md_class() for md_class in MessDetectorPlugin.__subclasses__()
|
||||
]
|
||||
|
||||
length: int = len(decoded_sequence) + 1
|
||||
|
||||
mean_mess_ratio: float = 0.0
|
||||
|
||||
if length < 512:
|
||||
intermediary_mean_mess_ratio_calc: int = 32
|
||||
elif length <= 1024:
|
||||
intermediary_mean_mess_ratio_calc = 64
|
||||
else:
|
||||
intermediary_mean_mess_ratio_calc = 128
|
||||
|
||||
for character, index in zip(decoded_sequence + "\n", range(length)):
|
||||
for detector in detectors:
|
||||
if detector.eligible(character):
|
||||
detector.feed(character)
|
||||
|
||||
if (
|
||||
index > 0 and index % intermediary_mean_mess_ratio_calc == 0
|
||||
) or index == length - 1:
|
||||
mean_mess_ratio = sum(dt.ratio for dt in detectors)
|
||||
|
||||
if mean_mess_ratio >= maximum_threshold:
|
||||
break
|
||||
|
||||
if debug:
|
||||
logger = getLogger("charset_normalizer")
|
||||
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Mess-detector extended-analysis start. "
|
||||
f"intermediary_mean_mess_ratio_calc={intermediary_mean_mess_ratio_calc} mean_mess_ratio={mean_mess_ratio} "
|
||||
f"maximum_threshold={maximum_threshold}",
|
||||
)
|
||||
|
||||
if len(decoded_sequence) > 16:
|
||||
logger.log(TRACE, f"Starting with: {decoded_sequence[:16]}")
|
||||
logger.log(TRACE, f"Ending with: {decoded_sequence[-16::]}")
|
||||
|
||||
for dt in detectors: # pragma: nocover
|
||||
logger.log(TRACE, f"{dt.__class__}: {dt.ratio}")
|
||||
|
||||
return round(mean_mess_ratio, 3)
|
Binary file not shown.
@ -1,340 +0,0 @@
|
||||
from encodings.aliases import aliases
|
||||
from hashlib import sha256
|
||||
from json import dumps
|
||||
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
||||
|
||||
from .constant import TOO_BIG_SEQUENCE
|
||||
from .utils import iana_name, is_multi_byte_encoding, unicode_range
|
||||
|
||||
|
||||
class CharsetMatch:
|
||||
def __init__(
|
||||
self,
|
||||
payload: bytes,
|
||||
guessed_encoding: str,
|
||||
mean_mess_ratio: float,
|
||||
has_sig_or_bom: bool,
|
||||
languages: "CoherenceMatches",
|
||||
decoded_payload: Optional[str] = None,
|
||||
):
|
||||
self._payload: bytes = payload
|
||||
|
||||
self._encoding: str = guessed_encoding
|
||||
self._mean_mess_ratio: float = mean_mess_ratio
|
||||
self._languages: CoherenceMatches = languages
|
||||
self._has_sig_or_bom: bool = has_sig_or_bom
|
||||
self._unicode_ranges: Optional[List[str]] = None
|
||||
|
||||
self._leaves: List[CharsetMatch] = []
|
||||
self._mean_coherence_ratio: float = 0.0
|
||||
|
||||
self._output_payload: Optional[bytes] = None
|
||||
self._output_encoding: Optional[str] = None
|
||||
|
||||
self._string: Optional[str] = decoded_payload
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
if not isinstance(other, CharsetMatch):
|
||||
raise TypeError(
|
||||
"__eq__ cannot be invoked on {} and {}.".format(
|
||||
str(other.__class__), str(self.__class__)
|
||||
)
|
||||
)
|
||||
return self.encoding == other.encoding and self.fingerprint == other.fingerprint
|
||||
|
||||
def __lt__(self, other: object) -> bool:
|
||||
"""
|
||||
Implemented to make sorted available upon CharsetMatches items.
|
||||
"""
|
||||
if not isinstance(other, CharsetMatch):
|
||||
raise ValueError
|
||||
|
||||
chaos_difference: float = abs(self.chaos - other.chaos)
|
||||
coherence_difference: float = abs(self.coherence - other.coherence)
|
||||
|
||||
# Below 1% difference --> Use Coherence
|
||||
if chaos_difference < 0.01 and coherence_difference > 0.02:
|
||||
return self.coherence > other.coherence
|
||||
elif chaos_difference < 0.01 and coherence_difference <= 0.02:
|
||||
# When having a difficult decision, use the result that decoded as many multi-byte as possible.
|
||||
# preserve RAM usage!
|
||||
if len(self._payload) >= TOO_BIG_SEQUENCE:
|
||||
return self.chaos < other.chaos
|
||||
return self.multi_byte_usage > other.multi_byte_usage
|
||||
|
||||
return self.chaos < other.chaos
|
||||
|
||||
@property
|
||||
def multi_byte_usage(self) -> float:
|
||||
return 1.0 - (len(str(self)) / len(self.raw))
|
||||
|
||||
def __str__(self) -> str:
|
||||
# Lazy Str Loading
|
||||
if self._string is None:
|
||||
self._string = str(self._payload, self._encoding, "strict")
|
||||
return self._string
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "<CharsetMatch '{}' bytes({})>".format(self.encoding, self.fingerprint)
|
||||
|
||||
def add_submatch(self, other: "CharsetMatch") -> None:
|
||||
if not isinstance(other, CharsetMatch) or other == self:
|
||||
raise ValueError(
|
||||
"Unable to add instance <{}> as a submatch of a CharsetMatch".format(
|
||||
other.__class__
|
||||
)
|
||||
)
|
||||
|
||||
other._string = None # Unload RAM usage; dirty trick.
|
||||
self._leaves.append(other)
|
||||
|
||||
@property
|
||||
def encoding(self) -> str:
|
||||
return self._encoding
|
||||
|
||||
@property
|
||||
def encoding_aliases(self) -> List[str]:
|
||||
"""
|
||||
Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
|
||||
"""
|
||||
also_known_as: List[str] = []
|
||||
for u, p in aliases.items():
|
||||
if self.encoding == u:
|
||||
also_known_as.append(p)
|
||||
elif self.encoding == p:
|
||||
also_known_as.append(u)
|
||||
return also_known_as
|
||||
|
||||
@property
|
||||
def bom(self) -> bool:
|
||||
return self._has_sig_or_bom
|
||||
|
||||
@property
|
||||
def byte_order_mark(self) -> bool:
|
||||
return self._has_sig_or_bom
|
||||
|
||||
@property
|
||||
def languages(self) -> List[str]:
|
||||
"""
|
||||
Return the complete list of possible languages found in decoded sequence.
|
||||
Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
|
||||
"""
|
||||
return [e[0] for e in self._languages]
|
||||
|
||||
@property
|
||||
def language(self) -> str:
|
||||
"""
|
||||
Most probable language found in decoded sequence. If none were detected or inferred, the property will return
|
||||
"Unknown".
|
||||
"""
|
||||
if not self._languages:
|
||||
# Trying to infer the language based on the given encoding
|
||||
# Its either English or we should not pronounce ourselves in certain cases.
|
||||
if "ascii" in self.could_be_from_charset:
|
||||
return "English"
|
||||
|
||||
# doing it there to avoid circular import
|
||||
from charset_normalizer.cd import encoding_languages, mb_encoding_languages
|
||||
|
||||
languages = (
|
||||
mb_encoding_languages(self.encoding)
|
||||
if is_multi_byte_encoding(self.encoding)
|
||||
else encoding_languages(self.encoding)
|
||||
)
|
||||
|
||||
if len(languages) == 0 or "Latin Based" in languages:
|
||||
return "Unknown"
|
||||
|
||||
return languages[0]
|
||||
|
||||
return self._languages[0][0]
|
||||
|
||||
@property
|
||||
def chaos(self) -> float:
|
||||
return self._mean_mess_ratio
|
||||
|
||||
@property
|
||||
def coherence(self) -> float:
|
||||
if not self._languages:
|
||||
return 0.0
|
||||
return self._languages[0][1]
|
||||
|
||||
@property
|
||||
def percent_chaos(self) -> float:
|
||||
return round(self.chaos * 100, ndigits=3)
|
||||
|
||||
@property
|
||||
def percent_coherence(self) -> float:
|
||||
return round(self.coherence * 100, ndigits=3)
|
||||
|
||||
@property
|
||||
def raw(self) -> bytes:
|
||||
"""
|
||||
Original untouched bytes.
|
||||
"""
|
||||
return self._payload
|
||||
|
||||
@property
|
||||
def submatch(self) -> List["CharsetMatch"]:
|
||||
return self._leaves
|
||||
|
||||
@property
|
||||
def has_submatch(self) -> bool:
|
||||
return len(self._leaves) > 0
|
||||
|
||||
@property
|
||||
def alphabets(self) -> List[str]:
|
||||
if self._unicode_ranges is not None:
|
||||
return self._unicode_ranges
|
||||
# list detected ranges
|
||||
detected_ranges: List[Optional[str]] = [
|
||||
unicode_range(char) for char in str(self)
|
||||
]
|
||||
# filter and sort
|
||||
self._unicode_ranges = sorted(list({r for r in detected_ranges if r}))
|
||||
return self._unicode_ranges
|
||||
|
||||
@property
|
||||
def could_be_from_charset(self) -> List[str]:
|
||||
"""
|
||||
The complete list of encoding that output the exact SAME str result and therefore could be the originating
|
||||
encoding.
|
||||
This list does include the encoding available in property 'encoding'.
|
||||
"""
|
||||
return [self._encoding] + [m.encoding for m in self._leaves]
|
||||
|
||||
def output(self, encoding: str = "utf_8") -> bytes:
|
||||
"""
|
||||
Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
|
||||
Any errors will be simply ignored by the encoder NOT replaced.
|
||||
"""
|
||||
if self._output_encoding is None or self._output_encoding != encoding:
|
||||
self._output_encoding = encoding
|
||||
self._output_payload = str(self).encode(encoding, "replace")
|
||||
|
||||
return self._output_payload # type: ignore
|
||||
|
||||
@property
|
||||
def fingerprint(self) -> str:
|
||||
"""
|
||||
Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
|
||||
"""
|
||||
return sha256(self.output()).hexdigest()
|
||||
|
||||
|
||||
class CharsetMatches:
|
||||
"""
|
||||
Container with every CharsetMatch items ordered by default from most probable to the less one.
|
||||
Act like a list(iterable) but does not implements all related methods.
|
||||
"""
|
||||
|
||||
def __init__(self, results: Optional[List[CharsetMatch]] = None):
|
||||
self._results: List[CharsetMatch] = sorted(results) if results else []
|
||||
|
||||
def __iter__(self) -> Iterator[CharsetMatch]:
|
||||
yield from self._results
|
||||
|
||||
def __getitem__(self, item: Union[int, str]) -> CharsetMatch:
|
||||
"""
|
||||
Retrieve a single item either by its position or encoding name (alias may be used here).
|
||||
Raise KeyError upon invalid index or encoding not present in results.
|
||||
"""
|
||||
if isinstance(item, int):
|
||||
return self._results[item]
|
||||
if isinstance(item, str):
|
||||
item = iana_name(item, False)
|
||||
for result in self._results:
|
||||
if item in result.could_be_from_charset:
|
||||
return result
|
||||
raise KeyError
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._results)
|
||||
|
||||
def __bool__(self) -> bool:
|
||||
return len(self._results) > 0
|
||||
|
||||
def append(self, item: CharsetMatch) -> None:
|
||||
"""
|
||||
Insert a single match. Will be inserted accordingly to preserve sort.
|
||||
Can be inserted as a submatch.
|
||||
"""
|
||||
if not isinstance(item, CharsetMatch):
|
||||
raise ValueError(
|
||||
"Cannot append instance '{}' to CharsetMatches".format(
|
||||
str(item.__class__)
|
||||
)
|
||||
)
|
||||
# We should disable the submatch factoring when the input file is too heavy (conserve RAM usage)
|
||||
if len(item.raw) <= TOO_BIG_SEQUENCE:
|
||||
for match in self._results:
|
||||
if match.fingerprint == item.fingerprint and match.chaos == item.chaos:
|
||||
match.add_submatch(item)
|
||||
return
|
||||
self._results.append(item)
|
||||
self._results = sorted(self._results)
|
||||
|
||||
def best(self) -> Optional["CharsetMatch"]:
|
||||
"""
|
||||
Simply return the first match. Strict equivalent to matches[0].
|
||||
"""
|
||||
if not self._results:
|
||||
return None
|
||||
return self._results[0]
|
||||
|
||||
def first(self) -> Optional["CharsetMatch"]:
|
||||
"""
|
||||
Redundant method, call the method best(). Kept for BC reasons.
|
||||
"""
|
||||
return self.best()
|
||||
|
||||
|
||||
CoherenceMatch = Tuple[str, float]
|
||||
CoherenceMatches = List[CoherenceMatch]
|
||||
|
||||
|
||||
class CliDetectionResult:
|
||||
def __init__(
|
||||
self,
|
||||
path: str,
|
||||
encoding: Optional[str],
|
||||
encoding_aliases: List[str],
|
||||
alternative_encodings: List[str],
|
||||
language: str,
|
||||
alphabets: List[str],
|
||||
has_sig_or_bom: bool,
|
||||
chaos: float,
|
||||
coherence: float,
|
||||
unicode_path: Optional[str],
|
||||
is_preferred: bool,
|
||||
):
|
||||
self.path: str = path
|
||||
self.unicode_path: Optional[str] = unicode_path
|
||||
self.encoding: Optional[str] = encoding
|
||||
self.encoding_aliases: List[str] = encoding_aliases
|
||||
self.alternative_encodings: List[str] = alternative_encodings
|
||||
self.language: str = language
|
||||
self.alphabets: List[str] = alphabets
|
||||
self.has_sig_or_bom: bool = has_sig_or_bom
|
||||
self.chaos: float = chaos
|
||||
self.coherence: float = coherence
|
||||
self.is_preferred: bool = is_preferred
|
||||
|
||||
@property
|
||||
def __dict__(self) -> Dict[str, Any]: # type: ignore
|
||||
return {
|
||||
"path": self.path,
|
||||
"encoding": self.encoding,
|
||||
"encoding_aliases": self.encoding_aliases,
|
||||
"alternative_encodings": self.alternative_encodings,
|
||||
"language": self.language,
|
||||
"alphabets": self.alphabets,
|
||||
"has_sig_or_bom": self.has_sig_or_bom,
|
||||
"chaos": self.chaos,
|
||||
"coherence": self.coherence,
|
||||
"unicode_path": self.unicode_path,
|
||||
"is_preferred": self.is_preferred,
|
||||
}
|
||||
|
||||
def to_json(self) -> str:
|
||||
return dumps(self.__dict__, ensure_ascii=True, indent=4)
|
@ -1,421 +0,0 @@
|
||||
import importlib
|
||||
import logging
|
||||
import unicodedata
|
||||
from codecs import IncrementalDecoder
|
||||
from encodings.aliases import aliases
|
||||
from functools import lru_cache
|
||||
from re import findall
|
||||
from typing import Generator, List, Optional, Set, Tuple, Union
|
||||
|
||||
from _multibytecodec import MultibyteIncrementalDecoder
|
||||
|
||||
from .constant import (
|
||||
ENCODING_MARKS,
|
||||
IANA_SUPPORTED_SIMILAR,
|
||||
RE_POSSIBLE_ENCODING_INDICATION,
|
||||
UNICODE_RANGES_COMBINED,
|
||||
UNICODE_SECONDARY_RANGE_KEYWORD,
|
||||
UTF8_MAXIMAL_ALLOCATION,
|
||||
)
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_accentuated(character: str) -> bool:
|
||||
try:
|
||||
description: str = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
return (
|
||||
"WITH GRAVE" in description
|
||||
or "WITH ACUTE" in description
|
||||
or "WITH CEDILLA" in description
|
||||
or "WITH DIAERESIS" in description
|
||||
or "WITH CIRCUMFLEX" in description
|
||||
or "WITH TILDE" in description
|
||||
or "WITH MACRON" in description
|
||||
or "WITH RING ABOVE" in description
|
||||
)
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def remove_accent(character: str) -> str:
|
||||
decomposed: str = unicodedata.decomposition(character)
|
||||
if not decomposed:
|
||||
return character
|
||||
|
||||
codes: List[str] = decomposed.split(" ")
|
||||
|
||||
return chr(int(codes[0], 16))
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def unicode_range(character: str) -> Optional[str]:
|
||||
"""
|
||||
Retrieve the Unicode range official name from a single character.
|
||||
"""
|
||||
character_ord: int = ord(character)
|
||||
|
||||
for range_name, ord_range in UNICODE_RANGES_COMBINED.items():
|
||||
if character_ord in ord_range:
|
||||
return range_name
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_latin(character: str) -> bool:
|
||||
try:
|
||||
description: str = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
return "LATIN" in description
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_punctuation(character: str) -> bool:
|
||||
character_category: str = unicodedata.category(character)
|
||||
|
||||
if "P" in character_category:
|
||||
return True
|
||||
|
||||
character_range: Optional[str] = unicode_range(character)
|
||||
|
||||
if character_range is None:
|
||||
return False
|
||||
|
||||
return "Punctuation" in character_range
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_symbol(character: str) -> bool:
|
||||
character_category: str = unicodedata.category(character)
|
||||
|
||||
if "S" in character_category or "N" in character_category:
|
||||
return True
|
||||
|
||||
character_range: Optional[str] = unicode_range(character)
|
||||
|
||||
if character_range is None:
|
||||
return False
|
||||
|
||||
return "Forms" in character_range and character_category != "Lo"
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_emoticon(character: str) -> bool:
|
||||
character_range: Optional[str] = unicode_range(character)
|
||||
|
||||
if character_range is None:
|
||||
return False
|
||||
|
||||
return "Emoticons" in character_range or "Pictographs" in character_range
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_separator(character: str) -> bool:
|
||||
if character.isspace() or character in {"|", "+", "<", ">"}:
|
||||
return True
|
||||
|
||||
character_category: str = unicodedata.category(character)
|
||||
|
||||
return "Z" in character_category or character_category in {"Po", "Pd", "Pc"}
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_case_variable(character: str) -> bool:
|
||||
return character.islower() != character.isupper()
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_cjk(character: str) -> bool:
|
||||
try:
|
||||
character_name = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return "CJK" in character_name
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_hiragana(character: str) -> bool:
|
||||
try:
|
||||
character_name = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return "HIRAGANA" in character_name
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_katakana(character: str) -> bool:
|
||||
try:
|
||||
character_name = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return "KATAKANA" in character_name
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_hangul(character: str) -> bool:
|
||||
try:
|
||||
character_name = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return "HANGUL" in character_name
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_thai(character: str) -> bool:
|
||||
try:
|
||||
character_name = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return "THAI" in character_name
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_arabic(character: str) -> bool:
|
||||
try:
|
||||
character_name = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return "ARABIC" in character_name
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_arabic_isolated_form(character: str) -> bool:
|
||||
try:
|
||||
character_name = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return "ARABIC" in character_name and "ISOLATED FORM" in character_name
|
||||
|
||||
|
||||
@lru_cache(maxsize=len(UNICODE_RANGES_COMBINED))
|
||||
def is_unicode_range_secondary(range_name: str) -> bool:
|
||||
return any(keyword in range_name for keyword in UNICODE_SECONDARY_RANGE_KEYWORD)
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_unprintable(character: str) -> bool:
|
||||
return (
|
||||
character.isspace() is False # includes \n \t \r \v
|
||||
and character.isprintable() is False
|
||||
and character != "\x1A" # Why? Its the ASCII substitute character.
|
||||
and character != "\ufeff" # bug discovered in Python,
|
||||
# Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space.
|
||||
)
|
||||
|
||||
|
||||
def any_specified_encoding(sequence: bytes, search_zone: int = 8192) -> Optional[str]:
|
||||
"""
|
||||
Extract using ASCII-only decoder any specified encoding in the first n-bytes.
|
||||
"""
|
||||
if not isinstance(sequence, bytes):
|
||||
raise TypeError
|
||||
|
||||
seq_len: int = len(sequence)
|
||||
|
||||
results: List[str] = findall(
|
||||
RE_POSSIBLE_ENCODING_INDICATION,
|
||||
sequence[: min(seq_len, search_zone)].decode("ascii", errors="ignore"),
|
||||
)
|
||||
|
||||
if len(results) == 0:
|
||||
return None
|
||||
|
||||
for specified_encoding in results:
|
||||
specified_encoding = specified_encoding.lower().replace("-", "_")
|
||||
|
||||
encoding_alias: str
|
||||
encoding_iana: str
|
||||
|
||||
for encoding_alias, encoding_iana in aliases.items():
|
||||
if encoding_alias == specified_encoding:
|
||||
return encoding_iana
|
||||
if encoding_iana == specified_encoding:
|
||||
return encoding_iana
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def is_multi_byte_encoding(name: str) -> bool:
|
||||
"""
|
||||
Verify is a specific encoding is a multi byte one based on it IANA name
|
||||
"""
|
||||
return name in {
|
||||
"utf_8",
|
||||
"utf_8_sig",
|
||||
"utf_16",
|
||||
"utf_16_be",
|
||||
"utf_16_le",
|
||||
"utf_32",
|
||||
"utf_32_le",
|
||||
"utf_32_be",
|
||||
"utf_7",
|
||||
} or issubclass(
|
||||
importlib.import_module("encodings.{}".format(name)).IncrementalDecoder,
|
||||
MultibyteIncrementalDecoder,
|
||||
)
|
||||
|
||||
|
||||
def identify_sig_or_bom(sequence: bytes) -> Tuple[Optional[str], bytes]:
|
||||
"""
|
||||
Identify and extract SIG/BOM in given sequence.
|
||||
"""
|
||||
|
||||
for iana_encoding in ENCODING_MARKS:
|
||||
marks: Union[bytes, List[bytes]] = ENCODING_MARKS[iana_encoding]
|
||||
|
||||
if isinstance(marks, bytes):
|
||||
marks = [marks]
|
||||
|
||||
for mark in marks:
|
||||
if sequence.startswith(mark):
|
||||
return iana_encoding, mark
|
||||
|
||||
return None, b""
|
||||
|
||||
|
||||
def should_strip_sig_or_bom(iana_encoding: str) -> bool:
|
||||
return iana_encoding not in {"utf_16", "utf_32"}
|
||||
|
||||
|
||||
def iana_name(cp_name: str, strict: bool = True) -> str:
|
||||
cp_name = cp_name.lower().replace("-", "_")
|
||||
|
||||
encoding_alias: str
|
||||
encoding_iana: str
|
||||
|
||||
for encoding_alias, encoding_iana in aliases.items():
|
||||
if cp_name in [encoding_alias, encoding_iana]:
|
||||
return encoding_iana
|
||||
|
||||
if strict:
|
||||
raise ValueError("Unable to retrieve IANA for '{}'".format(cp_name))
|
||||
|
||||
return cp_name
|
||||
|
||||
|
||||
def range_scan(decoded_sequence: str) -> List[str]:
|
||||
ranges: Set[str] = set()
|
||||
|
||||
for character in decoded_sequence:
|
||||
character_range: Optional[str] = unicode_range(character)
|
||||
|
||||
if character_range is None:
|
||||
continue
|
||||
|
||||
ranges.add(character_range)
|
||||
|
||||
return list(ranges)
|
||||
|
||||
|
||||
def cp_similarity(iana_name_a: str, iana_name_b: str) -> float:
|
||||
if is_multi_byte_encoding(iana_name_a) or is_multi_byte_encoding(iana_name_b):
|
||||
return 0.0
|
||||
|
||||
decoder_a = importlib.import_module(
|
||||
"encodings.{}".format(iana_name_a)
|
||||
).IncrementalDecoder
|
||||
decoder_b = importlib.import_module(
|
||||
"encodings.{}".format(iana_name_b)
|
||||
).IncrementalDecoder
|
||||
|
||||
id_a: IncrementalDecoder = decoder_a(errors="ignore")
|
||||
id_b: IncrementalDecoder = decoder_b(errors="ignore")
|
||||
|
||||
character_match_count: int = 0
|
||||
|
||||
for i in range(255):
|
||||
to_be_decoded: bytes = bytes([i])
|
||||
if id_a.decode(to_be_decoded) == id_b.decode(to_be_decoded):
|
||||
character_match_count += 1
|
||||
|
||||
return character_match_count / 254
|
||||
|
||||
|
||||
def is_cp_similar(iana_name_a: str, iana_name_b: str) -> bool:
|
||||
"""
|
||||
Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
|
||||
the function cp_similarity.
|
||||
"""
|
||||
return (
|
||||
iana_name_a in IANA_SUPPORTED_SIMILAR
|
||||
and iana_name_b in IANA_SUPPORTED_SIMILAR[iana_name_a]
|
||||
)
|
||||
|
||||
|
||||
def set_logging_handler(
|
||||
name: str = "charset_normalizer",
|
||||
level: int = logging.INFO,
|
||||
format_string: str = "%(asctime)s | %(levelname)s | %(message)s",
|
||||
) -> None:
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(level)
|
||||
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(logging.Formatter(format_string))
|
||||
logger.addHandler(handler)
|
||||
|
||||
|
||||
def cut_sequence_chunks(
|
||||
sequences: bytes,
|
||||
encoding_iana: str,
|
||||
offsets: range,
|
||||
chunk_size: int,
|
||||
bom_or_sig_available: bool,
|
||||
strip_sig_or_bom: bool,
|
||||
sig_payload: bytes,
|
||||
is_multi_byte_decoder: bool,
|
||||
decoded_payload: Optional[str] = None,
|
||||
) -> Generator[str, None, None]:
|
||||
if decoded_payload and is_multi_byte_decoder is False:
|
||||
for i in offsets:
|
||||
chunk = decoded_payload[i : i + chunk_size]
|
||||
if not chunk:
|
||||
break
|
||||
yield chunk
|
||||
else:
|
||||
for i in offsets:
|
||||
chunk_end = i + chunk_size
|
||||
if chunk_end > len(sequences) + 8:
|
||||
continue
|
||||
|
||||
cut_sequence = sequences[i : i + chunk_size]
|
||||
|
||||
if bom_or_sig_available and strip_sig_or_bom is False:
|
||||
cut_sequence = sig_payload + cut_sequence
|
||||
|
||||
chunk = cut_sequence.decode(
|
||||
encoding_iana,
|
||||
errors="ignore" if is_multi_byte_decoder else "strict",
|
||||
)
|
||||
|
||||
# multi-byte bad cutting detector and adjustment
|
||||
# not the cleanest way to perform that fix but clever enough for now.
|
||||
if is_multi_byte_decoder and i > 0:
|
||||
chunk_partial_size_chk: int = min(chunk_size, 16)
|
||||
|
||||
if (
|
||||
decoded_payload
|
||||
and chunk[:chunk_partial_size_chk] not in decoded_payload
|
||||
):
|
||||
for j in range(i, i - 4, -1):
|
||||
cut_sequence = sequences[j:chunk_end]
|
||||
|
||||
if bom_or_sig_available and strip_sig_or_bom is False:
|
||||
cut_sequence = sig_payload + cut_sequence
|
||||
|
||||
chunk = cut_sequence.decode(encoding_iana, errors="ignore")
|
||||
|
||||
if chunk[:chunk_partial_size_chk] in decoded_payload:
|
||||
break
|
||||
|
||||
yield chunk
|
@ -1,6 +0,0 @@
|
||||
"""
|
||||
Expose version
|
||||
"""
|
||||
|
||||
__version__ = "3.3.2"
|
||||
VERSION = __version__.split(".")
|
@ -1 +0,0 @@
|
||||
import os; var = 'SETUPTOOLS_USE_DISTUTILS'; enabled = os.environ.get(var, 'local') == 'local'; enabled and __import__('_distutils_hack').add_shim();
|
@ -1 +0,0 @@
|
||||
pip
|
@ -1,31 +0,0 @@
|
||||
BSD 3-Clause License
|
||||
|
||||
Copyright (c) 2013-2023, Kim Davies and contributors.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -1,243 +0,0 @@
|
||||
Metadata-Version: 2.1
|
||||
Name: idna
|
||||
Version: 3.6
|
||||
Summary: Internationalized Domain Names in Applications (IDNA)
|
||||
Author-email: Kim Davies <kim+pypi@gumleaf.org>
|
||||
Requires-Python: >=3.5
|
||||
Description-Content-Type: text/x-rst
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Intended Audience :: System Administrators
|
||||
Classifier: License :: OSI Approved :: BSD License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.5
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Internet :: Name Service (DNS)
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Topic :: Utilities
|
||||
Project-URL: Changelog, https://github.com/kjd/idna/blob/master/HISTORY.rst
|
||||
Project-URL: Issue tracker, https://github.com/kjd/idna/issues
|
||||
Project-URL: Source, https://github.com/kjd/idna
|
||||
|
||||
Internationalized Domain Names in Applications (IDNA)
|
||||
=====================================================
|
||||
|
||||
Support for the Internationalized Domain Names in
|
||||
Applications (IDNA) protocol as specified in `RFC 5891
|
||||
<https://tools.ietf.org/html/rfc5891>`_. This is the latest version of
|
||||
the protocol and is sometimes referred to as “IDNA 2008”.
|
||||
|
||||
This library also provides support for Unicode Technical
|
||||
Standard 46, `Unicode IDNA Compatibility Processing
|
||||
<https://unicode.org/reports/tr46/>`_.
|
||||
|
||||
This acts as a suitable replacement for the “encodings.idna”
|
||||
module that comes with the Python standard library, but which
|
||||
only supports the older superseded IDNA specification (`RFC 3490
|
||||
<https://tools.ietf.org/html/rfc3490>`_).
|
||||
|
||||
Basic functions are simply executed:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> import idna
|
||||
>>> idna.encode('ドメイン.テスト')
|
||||
b'xn--eckwd4c7c.xn--zckzah'
|
||||
>>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
|
||||
ドメイン.テスト
|
||||
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
This package is available for installation from PyPI:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python3 -m pip install idna
|
||||
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
For typical usage, the ``encode`` and ``decode`` functions will take a
|
||||
domain name argument and perform a conversion to A-labels or U-labels
|
||||
respectively.
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> import idna
|
||||
>>> idna.encode('ドメイン.テスト')
|
||||
b'xn--eckwd4c7c.xn--zckzah'
|
||||
>>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
|
||||
ドメイン.テスト
|
||||
|
||||
You may use the codec encoding and decoding methods using the
|
||||
``idna.codec`` module:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> import idna.codec
|
||||
>>> print('домен.испытание'.encode('idna2008'))
|
||||
b'xn--d1acufc.xn--80akhbyknj4f'
|
||||
>>> print(b'xn--d1acufc.xn--80akhbyknj4f'.decode('idna2008'))
|
||||
домен.испытание
|
||||
|
||||
Conversions can be applied at a per-label basis using the ``ulabel`` or
|
||||
``alabel`` functions if necessary:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> idna.alabel('测试')
|
||||
b'xn--0zwm56d'
|
||||
|
||||
Compatibility Mapping (UTS #46)
|
||||
+++++++++++++++++++++++++++++++
|
||||
|
||||
As described in `RFC 5895 <https://tools.ietf.org/html/rfc5895>`_, the
|
||||
IDNA specification does not normalize input from different potential
|
||||
ways a user may input a domain name. This functionality, known as
|
||||
a “mapping”, is considered by the specification to be a local
|
||||
user-interface issue distinct from IDNA conversion functionality.
|
||||
|
||||
This library provides one such mapping that was developed by the
|
||||
Unicode Consortium. Known as `Unicode IDNA Compatibility Processing
|
||||
<https://unicode.org/reports/tr46/>`_, it provides for both a regular
|
||||
mapping for typical applications, as well as a transitional mapping to
|
||||
help migrate from older IDNA 2003 applications.
|
||||
|
||||
For example, “Königsgäßchen” is not a permissible label as *LATIN
|
||||
CAPITAL LETTER K* is not allowed (nor are capital letters in general).
|
||||
UTS 46 will convert this into lower case prior to applying the IDNA
|
||||
conversion.
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> import idna
|
||||
>>> idna.encode('Königsgäßchen')
|
||||
...
|
||||
idna.core.InvalidCodepoint: Codepoint U+004B at position 1 of 'Königsgäßchen' not allowed
|
||||
>>> idna.encode('Königsgäßchen', uts46=True)
|
||||
b'xn--knigsgchen-b4a3dun'
|
||||
>>> print(idna.decode('xn--knigsgchen-b4a3dun'))
|
||||
königsgäßchen
|
||||
|
||||
Transitional processing provides conversions to help transition from
|
||||
the older 2003 standard to the current standard. For example, in the
|
||||
original IDNA specification, the *LATIN SMALL LETTER SHARP S* (ß) was
|
||||
converted into two *LATIN SMALL LETTER S* (ss), whereas in the current
|
||||
IDNA specification this conversion is not performed.
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> idna.encode('Königsgäßchen', uts46=True, transitional=True)
|
||||
'xn--knigsgsschen-lcb0w'
|
||||
|
||||
Implementers should use transitional processing with caution, only in
|
||||
rare cases where conversion from legacy labels to current labels must be
|
||||
performed (i.e. IDNA implementations that pre-date 2008). For typical
|
||||
applications that just need to convert labels, transitional processing
|
||||
is unlikely to be beneficial and could produce unexpected incompatible
|
||||
results.
|
||||
|
||||
``encodings.idna`` Compatibility
|
||||
++++++++++++++++++++++++++++++++
|
||||
|
||||
Function calls from the Python built-in ``encodings.idna`` module are
|
||||
mapped to their IDNA 2008 equivalents using the ``idna.compat`` module.
|
||||
Simply substitute the ``import`` clause in your code to refer to the new
|
||||
module name.
|
||||
|
||||
Exceptions
|
||||
----------
|
||||
|
||||
All errors raised during the conversion following the specification
|
||||
should raise an exception derived from the ``idna.IDNAError`` base
|
||||
class.
|
||||
|
||||
More specific exceptions that may be generated as ``idna.IDNABidiError``
|
||||
when the error reflects an illegal combination of left-to-right and
|
||||
right-to-left characters in a label; ``idna.InvalidCodepoint`` when
|
||||
a specific codepoint is an illegal character in an IDN label (i.e.
|
||||
INVALID); and ``idna.InvalidCodepointContext`` when the codepoint is
|
||||
illegal based on its positional context (i.e. it is CONTEXTO or CONTEXTJ
|
||||
but the contextual requirements are not satisfied.)
|
||||
|
||||
Building and Diagnostics
|
||||
------------------------
|
||||
|
||||
The IDNA and UTS 46 functionality relies upon pre-calculated lookup
|
||||
tables for performance. These tables are derived from computing against
|
||||
eligibility criteria in the respective standards. These tables are
|
||||
computed using the command-line script ``tools/idna-data``.
|
||||
|
||||
This tool will fetch relevant codepoint data from the Unicode repository
|
||||
and perform the required calculations to identify eligibility. There are
|
||||
three main modes:
|
||||
|
||||
* ``idna-data make-libdata``. Generates ``idnadata.py`` and
|
||||
``uts46data.py``, the pre-calculated lookup tables used for IDNA and
|
||||
UTS 46 conversions. Implementers who wish to track this library against
|
||||
a different Unicode version may use this tool to manually generate a
|
||||
different version of the ``idnadata.py`` and ``uts46data.py`` files.
|
||||
|
||||
* ``idna-data make-table``. Generate a table of the IDNA disposition
|
||||
(e.g. PVALID, CONTEXTJ, CONTEXTO) in the format found in Appendix
|
||||
B.1 of RFC 5892 and the pre-computed tables published by `IANA
|
||||
<https://www.iana.org/>`_.
|
||||
|
||||
* ``idna-data U+0061``. Prints debugging output on the various
|
||||
properties associated with an individual Unicode codepoint (in this
|
||||
case, U+0061), that are used to assess the IDNA and UTS 46 status of a
|
||||
codepoint. This is helpful in debugging or analysis.
|
||||
|
||||
The tool accepts a number of arguments, described using ``idna-data
|
||||
-h``. Most notably, the ``--version`` argument allows the specification
|
||||
of the version of Unicode to be used in computing the table data. For
|
||||
example, ``idna-data --version 9.0.0 make-libdata`` will generate
|
||||
library data against Unicode 9.0.0.
|
||||
|
||||
|
||||
Additional Notes
|
||||
----------------
|
||||
|
||||
* **Packages**. The latest tagged release version is published in the
|
||||
`Python Package Index <https://pypi.org/project/idna/>`_.
|
||||
|
||||
* **Version support**. This library supports Python 3.5 and higher.
|
||||
As this library serves as a low-level toolkit for a variety of
|
||||
applications, many of which strive for broad compatibility with older
|
||||
Python versions, there is no rush to remove older interpreter support.
|
||||
Removing support for older versions should be well justified in that the
|
||||
maintenance burden has become too high.
|
||||
|
||||
* **Python 2**. Python 2 is supported by version 2.x of this library.
|
||||
While active development of the version 2.x series has ended, notable
|
||||
issues being corrected may be backported to 2.x. Use "idna<3" in your
|
||||
requirements file if you need this library for a Python 2 application.
|
||||
|
||||
* **Testing**. The library has a test suite based on each rule of the
|
||||
IDNA specification, as well as tests that are provided as part of the
|
||||
Unicode Technical Standard 46, `Unicode IDNA Compatibility Processing
|
||||
<https://unicode.org/reports/tr46/>`_.
|
||||
|
||||
* **Emoji**. It is an occasional request to support emoji domains in
|
||||
this library. Encoding of symbols like emoji is expressly prohibited by
|
||||
the technical standard IDNA 2008 and emoji domains are broadly phased
|
||||
out across the domain industry due to associated security risks. For
|
||||
now, applications that need to support these non-compliant labels
|
||||
may wish to consider trying the encode/decode operation in this library
|
||||
first, and then falling back to using `encodings.idna`. See `the Github
|
||||
project <https://github.com/kjd/idna/issues/18>`_ for more discussion.
|
||||
|
@ -1,22 +0,0 @@
|
||||
idna-3.6.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||
idna-3.6.dist-info/LICENSE.md,sha256=yy-vDKGMbTh-x8tm8yGTn7puZ-nawJ0xR3y52NP-aJk,1541
|
||||
idna-3.6.dist-info/METADATA,sha256=N93B509dkvvkd_Y0E_VxCHPkVkrD6InxoyfXvX4egds,9888
|
||||
idna-3.6.dist-info/RECORD,,
|
||||
idna-3.6.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
||||
idna/__init__.py,sha256=KJQN1eQBr8iIK5SKrJ47lXvxG0BJ7Lm38W4zT0v_8lk,849
|
||||
idna/__pycache__/__init__.cpython-311.pyc,,
|
||||
idna/__pycache__/codec.cpython-311.pyc,,
|
||||
idna/__pycache__/compat.cpython-311.pyc,,
|
||||
idna/__pycache__/core.cpython-311.pyc,,
|
||||
idna/__pycache__/idnadata.cpython-311.pyc,,
|
||||
idna/__pycache__/intranges.cpython-311.pyc,,
|
||||
idna/__pycache__/package_data.cpython-311.pyc,,
|
||||
idna/__pycache__/uts46data.cpython-311.pyc,,
|
||||
idna/codec.py,sha256=PS6m-XmdST7Wj7J7ulRMakPDt5EBJyYrT3CPtjh-7t4,3426
|
||||
idna/compat.py,sha256=0_sOEUMT4CVw9doD3vyRhX80X19PwqFoUBs7gWsFME4,321
|
||||
idna/core.py,sha256=Bxz9L1rH0N5U-yukGfPuDRTxR2jDUl96NCq1ql3YAUw,12908
|
||||
idna/idnadata.py,sha256=9u3Ec_GRrhlcbs7QM3pAZ2ObEQzPIOm99FaVOm91UGg,44351
|
||||
idna/intranges.py,sha256=YBr4fRYuWH7kTKS2tXlFjM24ZF1Pdvcir-aywniInqg,1881
|
||||
idna/package_data.py,sha256=y-iv-qJdmHsWVR5FszYwsMo1AQg8qpdU2aU5nT-S2oQ,21
|
||||
idna/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
idna/uts46data.py,sha256=1KuksWqLuccPXm2uyRVkhfiFLNIhM_H2m4azCcnOqEU,206503
|
@ -1,4 +0,0 @@
|
||||
Wheel-Version: 1.0
|
||||
Generator: flit 3.9.0
|
||||
Root-Is-Purelib: true
|
||||
Tag: py3-none-any
|
@ -1,44 +0,0 @@
|
||||
from .package_data import __version__
|
||||
from .core import (
|
||||
IDNABidiError,
|
||||
IDNAError,
|
||||
InvalidCodepoint,
|
||||
InvalidCodepointContext,
|
||||
alabel,
|
||||
check_bidi,
|
||||
check_hyphen_ok,
|
||||
check_initial_combiner,
|
||||
check_label,
|
||||
check_nfc,
|
||||
decode,
|
||||
encode,
|
||||
ulabel,
|
||||
uts46_remap,
|
||||
valid_contextj,
|
||||
valid_contexto,
|
||||
valid_label_length,
|
||||
valid_string_length,
|
||||
)
|
||||
from .intranges import intranges_contain
|
||||
|
||||
__all__ = [
|
||||
"IDNABidiError",
|
||||
"IDNAError",
|
||||
"InvalidCodepoint",
|
||||
"InvalidCodepointContext",
|
||||
"alabel",
|
||||
"check_bidi",
|
||||
"check_hyphen_ok",
|
||||
"check_initial_combiner",
|
||||
"check_label",
|
||||
"check_nfc",
|
||||
"decode",
|
||||
"encode",
|
||||
"intranges_contain",
|
||||
"ulabel",
|
||||
"uts46_remap",
|
||||
"valid_contextj",
|
||||
"valid_contexto",
|
||||
"valid_label_length",
|
||||
"valid_string_length",
|
||||
]
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user