diff --git a/jrnl/DayOneJournal.py b/jrnl/DayOneJournal.py index a3e53826..8c5ea427 100644 --- a/jrnl/DayOneJournal.py +++ b/jrnl/DayOneJournal.py @@ -48,12 +48,9 @@ class DayOne(Journal.Journal): timezone = tzlocal.get_localzone() date = dict_entry['Creation Date'] date = date + timezone.utcoffset(date, is_dst=False) - raw = dict_entry['Entry Text'] - sep = re.search("\n|[\?!.]+ +\n?", raw) - title, body = (raw[:sep.end()], raw[sep.end():]) if sep else (raw, "") - entry = Entry.Entry(self, date, title, body, starred=dict_entry["Starred"]) + entry = Entry.Entry(self, date, text=dict_entry['Entry Text'], starred=dict_entry["Starred"]) entry.uuid = dict_entry["UUID"] - entry.tags = [self.config['tagsymbols'][0] + tag for tag in dict_entry.get("Tags", [])] + entry._tags = [self.config['tagsymbols'][0] + tag for tag in dict_entry.get("Tags", [])] self.entries.append(entry) self.sort() @@ -129,7 +126,7 @@ class DayOne(Journal.Journal): # Now, update our current entries if they changed for entry in entries: - entry.parse_tags() + entry._parse_text() matched_entries = [e for e in self.entries if e.uuid.lower() == entry.uuid] if matched_entries: # This entry is an existing entry diff --git a/jrnl/Entry.py b/jrnl/Entry.py index b773e747..b4a54fa3 100755 --- a/jrnl/Entry.py +++ b/jrnl/Entry.py @@ -5,33 +5,58 @@ from __future__ import unicode_literals import re import textwrap from datetime import datetime +from .util import split_title class Entry: - def __init__(self, journal, date=None, title="", body="", starred=False): - self.journal = journal # Reference to journal mainly to access it's config + def __init__(self, journal, date=None, text="", starred=False): + self.journal = journal # Reference to journal mainly to access its config self.date = date or datetime.now() - self.title = title.rstrip("\n ") - self.body = body.rstrip("\n ") - self.tags = self.parse_tags() + self.text = text + self._title = self._body = self._tags = None self.starred = starred self.modified = False @property def fulltext(self): return self.title + " " + self.body + + def _parse_text(self): + raw_text = self.text + lines = raw_text.splitlines() + if lines[0].strip().endswith("*"): + self.starred = True + raw_text = lines[0].strip("\n *") + "\n" + "\n".join(lines[1:]) + self._title, self._body = split_title(raw_text) + if self._tags is None: + self._tags = list(self._parse_tags()) + + @property + def title(self): + if self._title is None: + self._parse_text() + return self._title + + @property + def body(self): + if self._body is None: + self._parse_text() + return self._body + + @property + def tags(self): + if self._tags is None: + self._parse_text() + return self._tags @staticmethod def tag_regex(tagsymbols): - pattern = r'(?u)\s([{tags}][-+*#/\w]+)'.format(tags=tagsymbols) + pattern = r'(?u)(?:^|\s)([{tags}][-+*#/\w]+)'.format(tags=tagsymbols) return re.compile(pattern, re.UNICODE) - def parse_tags(self): - fulltext = " " + " ".join([self.title, self.body]).lower() + def _parse_tags(self): tagsymbols = self.journal.config['tagsymbols'] - tags = re.findall(Entry.tag_regex(tagsymbols), fulltext) - self.tags = tags - return set(tags) + return set(tag.lower() for tag in re.findall(Entry.tag_regex(tagsymbols), self.text)) def __unicode__(self): """Returns a string representation of the entry to be written into a journal file.""" diff --git a/jrnl/Journal.py b/jrnl/Journal.py index cbbcbf54..517076ba 100644 --- a/jrnl/Journal.py +++ b/jrnl/Journal.py @@ -88,40 +88,22 @@ class Journal(object): """Parses a journal that's stored in a string and returns a list of entries""" # Initialise our current entry entries = [] - current_entry = None - date_blob_re = re.compile("^\[[^\\]]+\] ") - for line in journal_txt.splitlines(): - line = line.rstrip() - date_blob = date_blob_re.findall(line) - if date_blob: - date_blob = date_blob[0] - new_date = time.parse(date_blob.strip(" []")) - if new_date: - # Found a date at the start of the line: This is a new entry. - if current_entry: - entries.append(current_entry) + date_blob_re = re.compile("(?:^|\n)\[([^\\]]+)\] ") + last_entry_pos = 0 + for match in date_blob_re.finditer(journal_txt): + date_blob = match.groups()[0] + new_date = time.parse(date_blob) + if new_date: + if entries: + entries[-1].text = journal_txt[last_entry_pos:match.start()] + last_entry_pos = match.end() + entries.append(Entry.Entry(self, date=new_date)) + # Finish the last entry + if entries: + entries[-1].text = journal_txt[last_entry_pos:] - if line.endswith("*"): - starred = True - line = line[:-1] - else: - starred = False - - current_entry = Entry.Entry( - self, - date=new_date, - title=line[len(date_blob):], - starred=starred - ) - elif current_entry: - # Didn't find a date - keep on feeding to current entry. - current_entry.body += line + "\n" - - # Append last entry - if current_entry: - entries.append(current_entry) for entry in entries: - entry.parse_tags() + entry._parse_text() return entries def __unicode__(self): @@ -183,20 +165,7 @@ class Journal(object): and (not start_date or entry.date >= start_date) and (not end_date or entry.date <= end_date) ] - if short: - if tags: - for e in self.entries: - res = [] - for tag in tags: - matches = [m for m in re.finditer(tag, e.body)] - for m in matches: - date = e.date.strftime(self.config['timeformat']) - excerpt = e.body[m.start():min(len(e.body), m.end() + 60)] - res.append('{0} {1} ..'.format(date, excerpt)) - e.body = "\n".join(res) - else: - for e in self.entries: - e.body = '' + self.entries = result def new_entry(self, raw, date=None, sort=True): @@ -207,23 +176,20 @@ class Journal(object): starred = False # Split raw text into title and body sep = re.search("\n|[\?!.]+ +\n?", raw) - title, body = (raw[:sep.end()], raw[sep.end():]) if sep else (raw, "") + first_line = raw[:sep.end()].strip() if sep else raw starred = False + if not date: - if title.find(": ") > 0: - starred = "*" in title[:title.find(": ")] - date = time.parse(title[:title.find(": ")], default_hour=self.config['default_hour'], default_minute=self.config['default_minute']) - if date or starred: # Parsed successfully, strip that from the raw text - title = title[title.find(": ") + 1:].strip() - elif title.strip().startswith("*"): - starred = True - title = title[1:].strip() - elif title.strip().endswith("*"): - starred = True - title = title[:-1].strip() + colon_pos = first_line.find(": ") + if colon_pos > 0: + date = time.parse(raw[:colon_pos], default_hour=self.config['default_hour'], default_minute=self.config['default_minute']) + if date: # Parsed successfully, strip that from the raw text + starred = raw[:colon_pos].strip().endswith("*") + raw = raw[colon_pos + 1:].strip() + starred = starred or first_line.startswith("*") or first_line.endswith("*") if not date: # Still nothing? Meh, just live in the moment. date = time.parse("now") - entry = Entry.Entry(self, date, title, body, starred=starred) + entry = Entry.Entry(self, date, raw, starred=starred) entry.modified = True self.entries.append(entry) if sort: @@ -264,8 +230,7 @@ class PlainJournal(Journal): class LegacyJournal(Journal): """Legacy class to support opening journals formatted with the jrnl 1.x standard. Main difference here is that in 1.x, timestamps were not cuddled - by square brackets, and the line break between the title and the rest of - the entry was not enforced. You'll not be able to save these journals anymore.""" + by square brackets. You'll not be able to save these journals anymore.""" def _load(self, filename): with codecs.open(filename, "r", "utf-8") as f: return f.read() @@ -295,18 +260,18 @@ class LegacyJournal(Journal): else: starred = False - current_entry = Entry.Entry(self, date=new_date, title=line[date_length + 1:], starred=starred) + current_entry = Entry.Entry(self, date=new_date, text=line[date_length + 1:], starred=starred) except ValueError: # Happens when we can't parse the start of the line as an date. # In this case, just append line to our body. if current_entry: - current_entry.body += line + u"\n" + current_entry.text += line + u"\n" # Append last entry if current_entry: entries.append(current_entry) for entry in entries: - entry.parse_tags() + entry._parse_text() return entries diff --git a/jrnl/plugins/markdown_exporter.py b/jrnl/plugins/markdown_exporter.py index 346a5a37..147e8ac5 100644 --- a/jrnl/plugins/markdown_exporter.py +++ b/jrnl/plugins/markdown_exporter.py @@ -5,7 +5,7 @@ from __future__ import absolute_import, unicode_literals, print_function from .text_exporter import TextExporter import re import sys -from ..util import WARNING_COLOR, ERROR_COLOR, RESET_COLOR +from ..util import WARNING_COLOR, RESET_COLOR class MarkdownExporter(TextExporter): @@ -52,7 +52,7 @@ class MarkdownExporter(TextExporter): if warn_on_heading_level is True: print("{}WARNING{}: Headings increased past H6 on export - {} {}".format(WARNING_COLOR, RESET_COLOR, date_str, entry.title), file=sys.stderr) - return "{md} {date} {title} {body} {space}".format( + return "{md} {date} {title}\n{body} {space}".format( md=heading, date=date_str, title=entry.title, diff --git a/jrnl/util.py b/jrnl/util.py index e7cedae7..a10d685f 100644 --- a/jrnl/util.py +++ b/jrnl/util.py @@ -30,6 +30,18 @@ WARNING_COLOR = "\033[33m" ERROR_COLOR = "\033[31m" RESET_COLOR = "\033[0m" +# Based on Segtok by Florian Leitner +# https://github.com/fnl/segtok +SENTENCE_SPLITTER = re.compile(r""" +( # A sentence ends at one of two sequences: + [.!?\u203C\u203D\u2047\u2048\u2049\u3002\uFE52\uFE57\uFF01\uFF0E\uFF1F\uFF61] # Either, a sequence starting with a sentence terminal, + [\'\u2019\"\u201D]? # an optional right quote, + [\]\)]* # optional closing brackets and + \s+ # a sequence of required spaces. +| # Otherwise, + \n # a sentence also terminates newlines. +)""", re.UNICODE | re.VERBOSE) + def getpass(prompt="Password: "): if not TEST: @@ -186,3 +198,11 @@ def byte2int(b): """Converts a byte to an integer. This is equivalent to ord(bs[0]) on Python 2 and bs[0] on Python 3.""" return ord(b)if PY2 else b + + +def split_title(text): + """Splits the first sentence off from a text.""" + punkt = SENTENCE_SPLITTER.search(text) + if not punkt: + return text, "" + return text[:punkt.end()].rstrip(), text[punkt.end():].lstrip()