fix #1 - UTF-8 decode problems when using en_US.
Apparently the en_US dictionary in ubuntu cannot be decoded with utf-8, but works fine with iso-8859-1. As a workaround this adds another configuration and command line option: encoding. If left empty it autodetects from the environment, otherwise it should be an encoding definition that python should use to read the dictionary.
This commit is contained in:
parent
b34c41281a
commit
6795454af1
@ -22,6 +22,9 @@ def main():
|
||||
parser.add_argument(
|
||||
'--myspell-dir', '-i',
|
||||
help='Directory containing myspell dictionaries')
|
||||
parser.add_argument(
|
||||
'--encoding', '-e',
|
||||
help="Character encoding of the directory")
|
||||
|
||||
parser.add_argument(
|
||||
'--lang', '-l',
|
||||
|
@ -4,6 +4,7 @@ import math
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import locale
|
||||
|
||||
if sys.version_info[0] < 3:
|
||||
import ConfigParser as configparser
|
||||
@ -36,6 +37,7 @@ def update_config(
|
||||
word_min_char=2,
|
||||
word_max_char=0,
|
||||
unmunch_bin='',
|
||||
encoding='',
|
||||
|
||||
words=4,
|
||||
capitalize='random',
|
||||
@ -65,6 +67,7 @@ def update_config(
|
||||
set_if_defined(conf, 'dictionary', 'word_min_char', word_min_char)
|
||||
set_if_defined(conf, 'dictionary', 'word_max_char', word_max_char)
|
||||
set_if_defined(conf, 'dictionary', 'unmunch_bin', unmunch_bin)
|
||||
set_if_defined(conf, 'dictionary', 'encoding', encoding)
|
||||
|
||||
if not conf.has_section('passwords'):
|
||||
conf.add_section('passwords')
|
||||
@ -93,6 +96,9 @@ def _read_dictionary(conf):
|
||||
dict_file = os.path.join(conf.get('dictionary', 'myspell_dir'), '{}.dic'.format(conf.get('dictionary', 'lang')))
|
||||
aff_file = os.path.join(conf.get('dictionary', 'myspell_dir'), '{}.aff'.format(conf.get('dictionary', 'lang')))
|
||||
unmunch_bin = conf.get('dictionary', 'unmunch_bin')
|
||||
encoding = conf.get('dictionary', 'encoding')
|
||||
if not encoding:
|
||||
encoding = locale.getpreferredencoding(False)
|
||||
words = set()
|
||||
chars = 0
|
||||
if os.path.exists(aff_file) and unmunch_bin:
|
||||
@ -106,7 +112,7 @@ def _read_dictionary(conf):
|
||||
if proc.returncode != 0:
|
||||
raise DictReadError('Unmunching dictionaries failed')
|
||||
for word in out.splitlines():
|
||||
save = word.strip().decode('utf-8')
|
||||
save = word.strip().decode(encoding)
|
||||
if not save:
|
||||
continue
|
||||
first_char = save[:1]
|
||||
@ -122,7 +128,7 @@ def _read_dictionary(conf):
|
||||
words.add(save)
|
||||
chars += len(save)
|
||||
else:
|
||||
with open(dict_file, 'r') as f:
|
||||
with open(dict_file, encoding=encoding, mode='r') as f:
|
||||
for line in f:
|
||||
if not line:
|
||||
continue
|
||||
|
Loading…
Reference in New Issue
Block a user