Only read text files that look like entries when opening folder journal (#1697)

* Add text file that should be ignored to basic test folder journal. Makes tons of tests fail
* Add additional files that should be ignored by FolderJournal
* Ignore all files in folder journal except year/month/day.txt
* Completely remake get_files in FolderJournal:
- move get_files into FolderJournal class and add underscore prefix
- create iterables to get for year/month folders and day files
- make year/month/day file reading strict: only exact expected months and days out of all possible months and days
* Restore accidentally-deleted self.sort() line
* Use match instead of string comparison to be os-agnostic
* Explicitly declare static methods
* Filter with glob first for max performance
* Explicitly check for valid dates in FolderJournal and add unit test
* Remove unneeded jrnl import
* Clean up method comment and add type hints
* Add is_valid_date unit test
* Elucidate comment

Co-authored-by: Jonathan Wren <jonathan@nowandwren.com>
This commit is contained in:
Micah Jerome Ellison 2023-04-29 15:49:41 -07:00 committed by GitHub
parent 88aa2491b0
commit 95836a7dd1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 153 additions and 16 deletions

View file

@ -2,8 +2,8 @@
# License: https://www.gnu.org/licenses/gpl-3.0.html
import codecs
import fnmatch
import os
import pathlib
from typing import TYPE_CHECKING
from jrnl import time
@ -13,14 +13,11 @@ from .Journal import Journal
if TYPE_CHECKING:
from jrnl.journals import Entry
def get_files(journal_config: str) -> list[str]:
"""Searches through sub directories starting with journal_config and find all text files"""
filenames = []
for root, dirnames, f in os.walk(journal_config):
for filename in fnmatch.filter(f, "*.txt"):
filenames.append(os.path.join(root, filename))
return filenames
# glob search patterns for folder/file structure
DIGIT_PATTERN = "[0123456789]"
YEAR_PATTERN = DIGIT_PATTERN * 4
MONTH_PATTERN = "[01]" + DIGIT_PATTERN
DAY_PATTERN = "[0123]" + DIGIT_PATTERN + ".txt"
class Folder(Journal):
@ -35,12 +32,15 @@ class Folder(Journal):
def open(self) -> "Folder":
filenames = []
self.entries = []
filenames = get_files(self.config["journal"])
for filename in filenames:
with codecs.open(filename, "r", "utf-8") as f:
journal = f.read()
self.entries.extend(self._parse(journal))
self.sort()
if os.path.exists(self.config["journal"]):
filenames = Folder._get_files(self.config["journal"])
for filename in filenames:
with codecs.open(filename, "r", "utf-8") as f:
journal = f.read()
self.entries.extend(self._parse(journal))
self.sort()
return self
def write(self) -> None:
@ -81,7 +81,7 @@ class Folder(Journal):
journal_file.write(journal)
# look for and delete empty files
filenames = []
filenames = get_files(self.config["journal"])
filenames = Folder._get_files(self.config["journal"])
for filename in filenames:
if os.stat(filename).st_size <= 0:
os.remove(filename)
@ -119,3 +119,39 @@ class Folder(Journal):
self.increment_change_counts_by_edit(mod_entries)
self.entries = mod_entries
@staticmethod
def _get_files(journal_path: str) -> list[str]:
"""Searches through sub directories starting with journal_path and find all text files that look like entries"""
for year_folder in Folder._get_year_folders(pathlib.Path(journal_path)):
for month_folder in Folder._get_month_folders(year_folder):
yield from Folder._get_day_files(month_folder)
@staticmethod
def _get_year_folders(path: pathlib.Path) -> list[pathlib.Path]:
for child in path.glob(YEAR_PATTERN):
if child.is_dir():
yield child
return
@staticmethod
def _get_month_folders(path: pathlib.Path) -> list[pathlib.Path]:
for child in path.glob(MONTH_PATTERN):
if int(child.name) > 0 and int(child.name) <= 12 and path.is_dir():
yield child
return
@staticmethod
def _get_day_files(path: pathlib.Path) -> list[str]:
for child in path.glob(DAY_PATTERN):
if (
int(child.stem) > 0
and int(child.stem) <= 31
and time.is_valid_date(
year=int(path.parent.name),
month=int(path.name),
day=int(child.stem),
)
and child.is_file()
):
yield str(child)

View file

@ -89,3 +89,11 @@ def parse(
if dt.days < -28 and not year_present:
date = date.replace(date.year - 1)
return date
def is_valid_date(year: int, month: int, day: int) -> bool:
try:
datetime.datetime(year, month, day)
return True
except ValueError:
return False