From 6795454af1ec9f3a01e054d4b35e6a833468480a Mon Sep 17 00:00:00 2001 From: Fredrik Eriksson Date: Sat, 2 May 2020 10:31:03 +0200 Subject: [PATCH] fix #1 - UTF-8 decode problems when using en_US. Apparently the en_US dictionary in ubuntu cannot be decoded with utf-8, but works fine with iso-8859-1. As a workaround this adds another configuration and command line option: encoding. If left empty it autodetects from the environment, otherwise it should be an encoding definition that python should use to read the dictionary. --- bin/pwgen | 3 +++ pwgen/__init__.py | 10 ++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/bin/pwgen b/bin/pwgen index 5bf7562..99e13bf 100644 --- a/bin/pwgen +++ b/bin/pwgen @@ -22,6 +22,9 @@ def main(): parser.add_argument( '--myspell-dir', '-i', help='Directory containing myspell dictionaries') + parser.add_argument( + '--encoding', '-e', + help="Character encoding of the directory") parser.add_argument( '--lang', '-l', diff --git a/pwgen/__init__.py b/pwgen/__init__.py index 088333d..5922e37 100644 --- a/pwgen/__init__.py +++ b/pwgen/__init__.py @@ -4,6 +4,7 @@ import math import os import sys import subprocess +import locale if sys.version_info[0] < 3: import ConfigParser as configparser @@ -36,6 +37,7 @@ def update_config( word_min_char=2, word_max_char=0, unmunch_bin='', + encoding='', words=4, capitalize='random', @@ -65,6 +67,7 @@ def update_config( set_if_defined(conf, 'dictionary', 'word_min_char', word_min_char) set_if_defined(conf, 'dictionary', 'word_max_char', word_max_char) set_if_defined(conf, 'dictionary', 'unmunch_bin', unmunch_bin) + set_if_defined(conf, 'dictionary', 'encoding', encoding) if not conf.has_section('passwords'): conf.add_section('passwords') @@ -93,6 +96,9 @@ def _read_dictionary(conf): dict_file = os.path.join(conf.get('dictionary', 'myspell_dir'), '{}.dic'.format(conf.get('dictionary', 'lang'))) aff_file = os.path.join(conf.get('dictionary', 'myspell_dir'), '{}.aff'.format(conf.get('dictionary', 'lang'))) unmunch_bin = conf.get('dictionary', 'unmunch_bin') + encoding = conf.get('dictionary', 'encoding') + if not encoding: + encoding = locale.getpreferredencoding(False) words = set() chars = 0 if os.path.exists(aff_file) and unmunch_bin: @@ -106,7 +112,7 @@ def _read_dictionary(conf): if proc.returncode != 0: raise DictReadError('Unmunching dictionaries failed') for word in out.splitlines(): - save = word.strip().decode('utf-8') + save = word.strip().decode(encoding) if not save: continue first_char = save[:1] @@ -122,7 +128,7 @@ def _read_dictionary(conf): words.add(save) chars += len(save) else: - with open(dict_file, 'r') as f: + with open(dict_file, encoding=encoding, mode='r') as f: for line in f: if not line: continue