From 8a78c349175da90c7c79e3bec81698a6b910b821 Mon Sep 17 00:00:00 2001 From: eshrh <16175276+eshrh@users.noreply.github.com> Date: Sat, 23 Jan 2021 18:29:43 -0500 Subject: [PATCH] Support title splitting for fullwidth CJK terminals (#1163) * Split by fullwidth terminals without spaces. * Add test * Update write.feature --- features/write.feature | 14 ++++++++++++++ jrnl/Entry.py | 13 ++++++++----- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/features/write.feature b/features/write.feature index efa26201..eb22e480 100644 --- a/features/write.feature +++ b/features/write.feature @@ -28,6 +28,20 @@ Feature: Writing new entries. | basic_folder | | basic_dayone | + Scenario Outline: CJK entry should be split at fullwidth period without following space. + Given we use the config ".yaml" + And we use the password "test" if prompted + When we run "jrnl 七転び。八起き" + And we run "jrnl -1" + Then the output should contain "| 八起き" + + Examples: configs + | config_file | + | basic_onefile | + | basic_encrypted | + | basic_folder | + | basic_dayone | + Scenario Outline: Writing an entry from command line should store the entry Given we use the config ".yaml" And we use the password "bad doggie no biscuit" if prompted diff --git a/jrnl/Entry.py b/jrnl/Entry.py index 2a85e015..67ba84f3 100755 --- a/jrnl/Entry.py +++ b/jrnl/Entry.py @@ -204,14 +204,17 @@ class Entry: # https://github.com/fnl/segtok SENTENCE_SPLITTER = re.compile( r""" -( # A sentence ends at one of two sequences: - [.!?\u2026\u203C\u203D\u2047\u2048\u2049\u22EF\u3002\uFE52\uFE57\uFF01\uFF0E\uFF1F\uFF61] # Either, a sequence starting with a sentence terminal, + ( + [.!?\u2026\u203C\u203D\u2047\u2048\u2049\u22EF\uFE52\uFE57] # Sequence starting with a sentence terminal, [\'\u2019\"\u201D]? # an optional right quote, - [\]\)]* # optional closing brackets and - \s+ # a sequence of required spaces. -)""", + [\]\)]* # optional closing bracket + \s+ # AND a sequence of required spaces. + ) + |[\uFF01\uFF0E\uFF1F\uFF61\u3002] # CJK full/half width terminals usually do not have following spaces. + """, re.VERBOSE, ) + SENTENCE_SPLITTER_ONLY_NEWLINE = re.compile("\n")