From 354cc3244ce3da512baad0a55c73cb4bfd490460 Mon Sep 17 00:00:00 2001 From: Thijs van Dijk Date: Sun, 22 Mar 2015 16:34:58 +0100 Subject: [PATCH] Fix improper charset handling in PY2 path of u(x) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Knowing fully that I may have just added another layer of impropriety, the above line fixed the charset errors I was getting. I'll illustrate this change with an example string `Charšet`, entered (e.g. through stdin) in UTF-8 encoding. To the best of my knowledge, the previous version would first have encoded this string to `Char\xc5\xa1et` (i.e., it encoded each byte not in ascii range as a hex escape code), and then have parsed this string to `CharÅ¡et` (i.e. after "r" it sees Unicode code point U+00c5 and U+00a1). My version simply takes this str for what it is: an UTF-8 representation of the unicode string `Charšet`. --- jrnl/util.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/jrnl/util.py b/jrnl/util.py index a2216a2b..dd5931f7 100644 --- a/jrnl/util.py +++ b/jrnl/util.py @@ -67,12 +67,7 @@ def set_keychain(journal_name, password): def u(s): """Mock unicode function for python 2 and 3 compatibility.""" - if PY3: - return str(s) - elif isinstance(s, basestring) and type(s) is not unicode: - return unicode(s.encode('string-escape'), "unicode_escape") - return unicode(s) - + return s if PY3 or type(s) is unicode else s.decode("utf-8") def py2encode(s): """Encode in Python 2, but not in python 3."""