Improves dateutil parsing

Closes #133
Fixes #183, #185, #228
This commit is contained in:
Manuel Ebert 2014-07-21 13:30:10 +09:00
parent c814ba9bc2
commit 6f1dd6077e
4 changed files with 58 additions and 51 deletions

View file

@ -4,12 +4,10 @@
from __future__ import absolute_import from __future__ import absolute_import
from . import Entry from . import Entry
from . import util from . import util
from . import time
import codecs import codecs
try: import parsedatetime.parsedatetime_consts as pdt
except ImportError: import parsedatetime as pdt
import re import re
from datetime import datetime from datetime import datetime
import dateutil
import sys import sys
try: try:
from Crypto.Cipher import AES from Crypto.Cipher import AES
@ -34,9 +32,6 @@ class Journal(object):
} }
self.config.update(kwargs) self.config.update(kwargs)
# Set up date parser # Set up date parser
consts = pdt.Constants(usePyICU=False)
consts.DOWParseStyle = -1 # "Monday" will be either today or the last Monday
self.dateparse = pdt.Calendar(consts)
self.key = None # used to decrypt and encrypt the journal self.key = None # used to decrypt and encrypt the journal
self.search_tags = None # Store tags we're highlighting self.search_tags = None # Store tags we're highlighting
self.name = name self.name = name
@ -212,8 +207,9 @@ class Journal(object):
If strict is True, all tags must be present in an entry. If false, the If strict is True, all tags must be present in an entry. If false, the
entry is kept if any tag is present.""" entry is kept if any tag is present."""
self.search_tags = set([tag.lower() for tag in tags]) self.search_tags = set([tag.lower() for tag in tags])
end_date = self.parse_date(end_date) end_date = time.parse(end_date, inclusive=True)
start_date = self.parse_date(start_date) start_date = time.parse(start_date)
# If strict mode is on, all tags have to be present in entry # If strict mode is on, all tags have to be present in entry
tagged = self.search_tags.issubset if strict else self.search_tags.intersection tagged = self.search_tags.issubset if strict else self.search_tags.intersection
result = [ result = [
@ -239,43 +235,6 @@ class Journal(object):
e.body = '' e.body = ''
self.entries = result self.entries = result
def parse_date(self, date_str):
"""Parses a string containing a fuzzy date and returns a datetime.datetime object"""
if not date_str:
return None
elif isinstance(date_str, datetime):
return date_str
try:
date = dateutil.parser.parse(date_str)
flag = 1 if date.hour == 0 and date.minute == 0 else 2
date = date.timetuple()
except:
date, flag = self.dateparse.parse(date_str)
if not flag: # Oops, unparsable.
try: # Try and parse this as a single year
year = int(date_str)
return datetime(year, 1, 1)
except ValueError:
return None
except TypeError:
return None
if flag is 1: # Date found, but no time. Use the default time.
date = datetime(*date[:3], hour=self.config['default_hour'], minute=self.config['default_minute'])
else:
date = datetime(*date[:6])
# Ugly heuristic: if the date is more than 4 weeks in the future, we got the year wrong.
# Rather then this, we would like to see parsedatetime patched so we can tell it to prefer
# past dates
dt = datetime.now() - date
if dt.days < -28:
date = date.replace(date.year - 1)
return date
def new_entry(self, raw, date=None, sort=True): def new_entry(self, raw, date=None, sort=True):
"""Constructs a new entry from some raw text input. """Constructs a new entry from some raw text input.
If a date is given, it will parse and use this, otherwise scan for a date in the input first.""" If a date is given, it will parse and use this, otherwise scan for a date in the input first."""
@ -289,7 +248,7 @@ class Journal(object):
if not date: if not date:
if title.find(": ") > 0: if title.find(": ") > 0:
starred = "*" in title[:title.find(": ")] starred = "*" in title[:title.find(": ")]
date = self.parse_date(title[:title.find(": ")]) date = time.parse(title[:title.find(": ")], default_hour=self.config['default_hour'], default_minute=self.config['default_minute'])
if date or starred: # Parsed successfully, strip that from the raw text if date or starred: # Parsed successfully, strip that from the raw text
title = title[title.find(": ")+1:].strip() title = title[title.find(": ")+1:].strip()
elif title.strip().startswith("*"): elif title.strip().startswith("*"):
@ -299,7 +258,7 @@ class Journal(object):
starred = True starred = True
title = title[:-1].strip() title = title[:-1].strip()
if not date: # Still nothing? Meh, just live in the moment. if not date: # Still nothing? Meh, just live in the moment.
date = self.parse_date("now") date = time.parse("now")
entry = Entry.Entry(self, date, title, body, starred=starred) entry = Entry.Entry(self, date, title, body, starred=starred)
entry.modified = True entry.modified = True
self.entries.append(entry) self.entries.append(entry)

49
jrnl/time.py Normal file
View file

@ -0,0 +1,49 @@
from datetime import datetime
from dateutil.parser import parse as dateparse
try: import parsedatetime.parsedatetime_consts as pdt
except ImportError: import parsedatetime as pdt
DEFAULT_FUTURE = datetime(datetime.now().year, 12, 31, 23, 59, 59)
DEFAULT_PAST = datetime(datetime.now().year, 1, 1, 0, 0)
consts = pdt.Constants(usePyICU=False)
consts.DOWParseStyle = -1 # "Monday" will be either today or the last Monday
CALENDAR = pdt.Calendar(consts)
def parse(date_str, inclusive=False, default_hour=None, default_minute=None):
"""Parses a string containing a fuzzy date and returns a datetime.datetime object"""
if not date_str:
return None
elif isinstance(date_str, datetime):
return date_str
try:
date = dateparse(date_str, default=DEFAULT_FUTURE if inclusive else DEFAULT_PAST)
flag = 1 if date.hour == date.minute == 0 else 2
date = date.timetuple()
except ValueError:
date, flag = CALENDAR.parse(date_str)
if not flag: # Oops, unparsable.
try: # Try and parse this as a single year
year = int(date_str)
return datetime(year, 1, 1)
except ValueError:
return None
except TypeError:
return None
if flag is 1: # Date found, but no time. Use the default time.
date = datetime(*date[:3], hour=default_hour or 0, minute=default_minute or 0)
else:
date = datetime(*date[:6])
# Ugly heuristic: if the date is more than 4 weeks in the future, we got the year wrong.
# Rather then this, we would like to see parsedatetime patched so we can tell it to prefer
# past dates
dt = datetime.now() - date
if dt.days < -28:
date = date.replace(date.year - 1)
return date

View file

@ -2,10 +2,8 @@
# encoding: utf-8 # encoding: utf-8
import sys import sys
import os import os
from tzlocal import get_localzone
import getpass as gp import getpass as gp
import keyring import keyring
import pytz
import json import json
if "win32" in sys.platform: if "win32" in sys.platform:
import colorama import colorama
@ -24,12 +22,14 @@ STDOUT = sys.stdout
TEST = False TEST = False
__cached_tz = None __cached_tz = None
def getpass(prompt="Password: "): def getpass(prompt="Password: "):
if not TEST: if not TEST:
return gp.getpass(prompt) return gp.getpass(prompt)
else: else:
return py23_input(prompt) return py23_input(prompt)
def get_password(validator, keychain=None, max_attempts=3): def get_password(validator, keychain=None, max_attempts=3):
pwd_from_keychain = keychain and get_keychain(keychain) pwd_from_keychain = keychain and get_keychain(keychain)
password = pwd_from_keychain or getpass() password = pwd_from_keychain or getpass()
@ -150,4 +150,3 @@ def byte2int(b):
"""Converts a byte to an integer. """Converts a byte to an integer.
This is equivalent to ord(bs[0]) on Python 2 and bs[0] on Python 3.""" This is equivalent to ord(bs[0]) on Python 2 and bs[0] on Python 3."""
return ord(b)if PY2 else b return ord(b)if PY2 else b

View file

@ -84,7 +84,7 @@ setup(
"six>=1.6.1", "six>=1.6.1",
"tzlocal>=1.1", "tzlocal>=1.1",
"keyring>=3.3", "keyring>=3.3",
"python-dateutil>=2.2" "python-dateutil==1.5"
] + [p for p, cond in conditional_dependencies.items() if cond], ] + [p for p, cond in conditional_dependencies.items() if cond],
extras_require = { extras_require = {
"encrypted": "pycrypto>=2.6" "encrypted": "pycrypto>=2.6"