Seperation of title and entry is now purely virtual.

Fixes #360
2025-07-08 01:06:12 +02:00 · 2015-12-28 21:24:39 -08:00 · 2015-12-28 21:24:39 -08:00 · 95d399d5c3
commit 95d399d5c3
parent 849dc89557
5 changed files with 90 additions and 83 deletions
--- a/jrnl/DayOneJournal.py
+++ b/jrnl/DayOneJournal.py
@ -48,12 +48,9 @@ class DayOne(Journal.Journal):
                        timezone = tzlocal.get_localzone()
                    date = dict_entry['Creation Date']
                    date = date + timezone.utcoffset(date, is_dst=False)
-                    raw = dict_entry['Entry Text']
-                    sep = re.search("\n|[\?!.]+ +\n?", raw)
-                    title, body = (raw[:sep.end()], raw[sep.end():]) if sep else (raw, "")
-                    entry = Entry.Entry(self, date, title, body, starred=dict_entry["Starred"])
+                    entry = Entry.Entry(self, date, text=dict_entry['Entry Text'], starred=dict_entry["Starred"])
                    entry.uuid = dict_entry["UUID"]
-                    entry.tags = [self.config['tagsymbols'][0] + tag for tag in dict_entry.get("Tags", [])]
+                    entry._tags = [self.config['tagsymbols'][0] + tag for tag in dict_entry.get("Tags", [])]

                    self.entries.append(entry)
        self.sort()
@ -129,7 +126,7 @@ class DayOne(Journal.Journal):

        # Now, update our current entries if they changed
        for entry in entries:
-            entry.parse_tags()
+            entry._parse_text()
            matched_entries = [e for e in self.entries if e.uuid.lower() == entry.uuid]
            if matched_entries:
                # This entry is an existing entry
--- a/jrnl/Entry.py
+++ b/jrnl/Entry.py
@ -5,33 +5,58 @@ from __future__ import unicode_literals
 import re
 import textwrap
 from datetime import datetime
+from .util import split_title


 class Entry:
-    def __init__(self, journal, date=None, title="", body="", starred=False):
-        self.journal = journal  # Reference to journal mainly to access it's config
+    def __init__(self, journal, date=None, text="", starred=False):
+        self.journal = journal  # Reference to journal mainly to access its config
        self.date = date or datetime.now()
-        self.title = title.rstrip("\n ")
-        self.body = body.rstrip("\n ")
-        self.tags = self.parse_tags()
+        self.text = text
+        self._title = self._body = self._tags = None
        self.starred = starred
        self.modified = False

    @property
    def fulltext(self):
        return self.title + " " + self.body
+
+    def _parse_text(self):
+        raw_text = self.text
+        lines = raw_text.splitlines()
+        if lines[0].strip().endswith("*"):
+            self.starred = True
+            raw_text = lines[0].strip("\n *") + "\n" + "\n".join(lines[1:])
+        self._title, self._body = split_title(raw_text)
+        if self._tags is None:
+            self._tags = list(self._parse_tags())
+
+    @property
+    def title(self):
+        if self._title is None:
+            self._parse_text()
+        return self._title
+
+    @property
+    def body(self):
+        if self._body is None:
+            self._parse_text()
+        return self._body
+
+    @property
+    def tags(self):
+        if self._tags is None:
+            self._parse_text()
+        return self._tags
    
    @staticmethod
    def tag_regex(tagsymbols):
-        pattern = r'(?u)\s([{tags}][-+*#/\w]+)'.format(tags=tagsymbols)
+        pattern = r'(?u)(?:^|\s)([{tags}][-+*#/\w]+)'.format(tags=tagsymbols)
        return re.compile(pattern, re.UNICODE)

-    def parse_tags(self):
-        fulltext = " " + " ".join([self.title, self.body]).lower()
+    def _parse_tags(self):
        tagsymbols = self.journal.config['tagsymbols']
-        tags = re.findall(Entry.tag_regex(tagsymbols), fulltext)
-        self.tags = tags
-        return set(tags)
+        return set(tag.lower() for tag in re.findall(Entry.tag_regex(tagsymbols), self.text))

    def __unicode__(self):
        """Returns a string representation of the entry to be written into a journal file."""
--- a/jrnl/Journal.py
+++ b/jrnl/Journal.py
@ -88,40 +88,22 @@ class Journal(object):
        """Parses a journal that's stored in a string and returns a list of entries"""
        # Initialise our current entry
        entries = []
-        current_entry = None
-        date_blob_re = re.compile("^\[[^\\]]+\] ")
-        for line in journal_txt.splitlines():
-            line = line.rstrip()
-            date_blob = date_blob_re.findall(line)
-            if date_blob:
-                date_blob = date_blob[0]
-                new_date = time.parse(date_blob.strip(" []"))
-                if new_date:
-                    # Found a date at the start of the line: This is a new entry.
-                    if current_entry:
-                        entries.append(current_entry)
+        date_blob_re = re.compile("(?:^|\n)\[([^\\]]+)\] ")
+        last_entry_pos = 0
+        for match in date_blob_re.finditer(journal_txt):
+            date_blob = match.groups()[0]
+            new_date = time.parse(date_blob)
+            if new_date:
+                if entries:
+                    entries[-1].text = journal_txt[last_entry_pos:match.start()]
+                last_entry_pos = match.end()
+                entries.append(Entry.Entry(self, date=new_date))
+        # Finish the last entry
+        if entries:
+            entries[-1].text = journal_txt[last_entry_pos:]

-                    if line.endswith("*"):
-                        starred = True
-                        line = line[:-1]
-                    else:
-                        starred = False
-
-                    current_entry = Entry.Entry(
-                        self,
-                        date=new_date,
-                        title=line[len(date_blob):],
-                        starred=starred
-                    )
-            elif current_entry:
-                # Didn't find a date - keep on feeding to current entry.
-                current_entry.body += line + "\n"
-
-        # Append last entry
-        if current_entry:
-            entries.append(current_entry)
        for entry in entries:
-            entry.parse_tags()
+            entry._parse_text()
        return entries

    def __unicode__(self):
@ -183,20 +165,7 @@ class Journal(object):
            and (not start_date or entry.date >= start_date)
            and (not end_date or entry.date <= end_date)
        ]
-        if short:
-            if tags:
-                for e in self.entries:
-                    res = []
-                    for tag in tags:
-                        matches = [m for m in re.finditer(tag, e.body)]
-                        for m in matches:
-                            date = e.date.strftime(self.config['timeformat'])
-                            excerpt = e.body[m.start():min(len(e.body), m.end() + 60)]
-                            res.append('{0} {1} ..'.format(date, excerpt))
-                    e.body = "\n".join(res)
-            else:
-                for e in self.entries:
-                    e.body = ''
+
        self.entries = result

    def new_entry(self, raw, date=None, sort=True):
@ -207,23 +176,20 @@ class Journal(object):
        starred = False
        # Split raw text into title and body
        sep = re.search("\n|[\?!.]+ +\n?", raw)
-        title, body = (raw[:sep.end()], raw[sep.end():]) if sep else (raw, "")
+        first_line = raw[:sep.end()].strip() if sep else raw
        starred = False
+
        if not date:
-            if title.find(": ") > 0:
-                starred = "*" in title[:title.find(": ")]
-                date = time.parse(title[:title.find(": ")], default_hour=self.config['default_hour'], default_minute=self.config['default_minute'])
-                if date or starred:  # Parsed successfully, strip that from the raw text
-                    title = title[title.find(": ") + 1:].strip()
-            elif title.strip().startswith("*"):
-                starred = True
-                title = title[1:].strip()
-            elif title.strip().endswith("*"):
-                starred = True
-                title = title[:-1].strip()
+            colon_pos = first_line.find(": ")
+            if colon_pos > 0:
+                date = time.parse(raw[:colon_pos], default_hour=self.config['default_hour'], default_minute=self.config['default_minute'])
+                if date:  # Parsed successfully, strip that from the raw text
+                    starred = raw[:colon_pos].strip().endswith("*")
+                    raw = raw[colon_pos + 1:].strip()
+        starred = starred or first_line.startswith("*") or first_line.endswith("*")
        if not date:  # Still nothing? Meh, just live in the moment.
            date = time.parse("now")
-        entry = Entry.Entry(self, date, title, body, starred=starred)
+        entry = Entry.Entry(self, date, raw, starred=starred)
        entry.modified = True
        self.entries.append(entry)
        if sort:
@ -264,8 +230,7 @@ class PlainJournal(Journal):
 class LegacyJournal(Journal):
    """Legacy class to support opening journals formatted with the jrnl 1.x
    standard. Main difference here is that in 1.x, timestamps were not cuddled
-    by square brackets, and the line break between the title and the rest of
-    the entry was not enforced. You'll not be able to save these journals anymore."""
+    by square brackets. You'll not be able to save these journals anymore."""
    def _load(self, filename):
        with codecs.open(filename, "r", "utf-8") as f:
            return f.read()
@ -295,18 +260,18 @@ class LegacyJournal(Journal):
                else:
                    starred = False

-                current_entry = Entry.Entry(self, date=new_date, title=line[date_length + 1:], starred=starred)
+                current_entry = Entry.Entry(self, date=new_date, text=line[date_length + 1:], starred=starred)
            except ValueError:
                # Happens when we can't parse the start of the line as an date.
                # In this case, just append line to our body.
                if current_entry:
-                    current_entry.body += line + u"\n"
+                    current_entry.text += line + u"\n"

        # Append last entry
        if current_entry:
            entries.append(current_entry)
        for entry in entries:
-            entry.parse_tags()
+            entry._parse_text()
        return entries


--- a/jrnl/plugins/markdown_exporter.py
+++ b/jrnl/plugins/markdown_exporter.py
@ -5,7 +5,7 @@ from __future__ import absolute_import, unicode_literals, print_function
 from .text_exporter import TextExporter
 import re
 import sys
-from ..util import WARNING_COLOR, ERROR_COLOR, RESET_COLOR
+from ..util import WARNING_COLOR, RESET_COLOR


 class MarkdownExporter(TextExporter):
@ -52,7 +52,7 @@ class MarkdownExporter(TextExporter):
        if warn_on_heading_level is True:
            print("{}WARNING{}: Headings increased past H6 on export - {} {}".format(WARNING_COLOR, RESET_COLOR, date_str, entry.title), file=sys.stderr)

-        return "{md} {date} {title} {body} {space}".format(
+        return "{md} {date} {title}\n{body} {space}".format(
            md=heading,
            date=date_str,
            title=entry.title,
--- a/jrnl/util.py
+++ b/jrnl/util.py
@ -30,6 +30,18 @@ WARNING_COLOR = "\033[33m"
 ERROR_COLOR = "\033[31m"
 RESET_COLOR = "\033[0m"

+# Based on Segtok by Florian Leitner
+# https://github.com/fnl/segtok
+SENTENCE_SPLITTER = re.compile(r"""
+(                       # A sentence ends at one of two sequences:
+    [.!?\u203C\u203D\u2047\u2048\u2049\u3002\uFE52\uFE57\uFF01\uFF0E\uFF1F\uFF61]                # Either, a sequence starting with a sentence terminal,
+    [\'\u2019\"\u201D]? # an optional right quote,
+    [\]\)]*             # optional closing brackets and
+    \s+                 # a sequence of required spaces.
+|                       # Otherwise,
+    \n                  # a sentence also terminates newlines.
+)""", re.UNICODE | re.VERBOSE)
+

 def getpass(prompt="Password: "):
    if not TEST:
@ -186,3 +198,11 @@ def byte2int(b):
    """Converts a byte to an integer.
    This is equivalent to ord(bs[0]) on Python 2 and bs[0] on Python 3."""
    return ord(b)if PY2 else b
+
+
+def split_title(text):
+    """Splits the first sentence off from a text."""
+    punkt = SENTENCE_SPLITTER.search(text)
+    if not punkt:
+        return text, ""
+    return text[:punkt.end()].rstrip(), text[punkt.end():].lstrip()