From 354cc3244ce3da512baad0a55c73cb4bfd490460 Mon Sep 17 00:00:00 2001
From: Thijs van Dijk <github@inurbanus.nl>
Date: Sun, 22 Mar 2015 16:34:58 +0100
Subject: [PATCH] Fix improper charset handling in PY2 path of u(x)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Knowing fully that I may have just added another layer of impropriety, the above line fixed the charset errors I was getting.

I'll illustrate this change with an example string `Charšet`, entered (e.g. through stdin) in UTF-8 encoding.
To the best of my knowledge, the previous version would first have encoded this string to `Char\xc5\xa1et` (i.e., it encoded each byte not in ascii range as a hex escape code), and then have parsed this string to `CharÅ¡et` (i.e. after "r" it sees Unicode code point U+00c5 and U+00a1).

My version simply takes this str for what it is: an UTF-8 representation of the unicode string `Charšet`.
---
 jrnl/util.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/jrnl/util.py b/jrnl/util.py
index a2216a2b..dd5931f7 100644
--- a/jrnl/util.py
+++ b/jrnl/util.py
@@ -67,12 +67,7 @@ def set_keychain(journal_name, password):
 
 def u(s):
     """Mock unicode function for python 2 and 3 compatibility."""
-    if PY3:
-        return str(s)
-    elif isinstance(s, basestring) and type(s) is not unicode:
-        return unicode(s.encode('string-escape'), "unicode_escape")
-    return unicode(s)
-
+    return s if PY3 or type(s) is unicode else s.decode("utf-8")
 
 def py2encode(s):
     """Encode in Python 2, but not in python 3."""