mirror of
https://github.com/jrnl-org/jrnl.git
synced 2025-05-20 04:58:32 +02:00
Split by fullwidth terminals without spaces.
This commit is contained in:
parent
f80f033d5d
commit
5947bd3bda
1 changed files with 8 additions and 5 deletions
|
@ -204,14 +204,17 @@ class Entry:
|
||||||
# https://github.com/fnl/segtok
|
# https://github.com/fnl/segtok
|
||||||
SENTENCE_SPLITTER = re.compile(
|
SENTENCE_SPLITTER = re.compile(
|
||||||
r"""
|
r"""
|
||||||
( # A sentence ends at one of two sequences:
|
(
|
||||||
[.!?\u2026\u203C\u203D\u2047\u2048\u2049\u22EF\u3002\uFE52\uFE57\uFF01\uFF0E\uFF1F\uFF61] # Either, a sequence starting with a sentence terminal,
|
[.!?\u2026\u203C\u203D\u2047\u2048\u2049\u22EF\uFE52\uFE57] # Sequence starting with a sentence terminal,
|
||||||
[\'\u2019\"\u201D]? # an optional right quote,
|
[\'\u2019\"\u201D]? # an optional right quote,
|
||||||
[\]\)]* # optional closing brackets and
|
[\]\)]* # optional closing bracket
|
||||||
\s+ # a sequence of required spaces.
|
\s+ # AND a sequence of required spaces.
|
||||||
)""",
|
)
|
||||||
|
|[\uFF01\uFF0E\uFF1F\uFF61\u3002] # CJK full/half width terminals usually do not have following spaces.
|
||||||
|
""",
|
||||||
re.VERBOSE,
|
re.VERBOSE,
|
||||||
)
|
)
|
||||||
|
|
||||||
SENTENCE_SPLITTER_ONLY_NEWLINE = re.compile("\n")
|
SENTENCE_SPLITTER_ONLY_NEWLINE = re.compile("\n")
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue