Only read text files that look like entries when opening folder journal (#1697)

* Add text file that should be ignored to basic test folder journal. Makes tons of tests fail
* Add additional files that should be ignored by FolderJournal
* Ignore all files in folder journal except year/month/day.txt
* Completely remake get_files in FolderJournal:
- move get_files into FolderJournal class and add underscore prefix
- create iterables to get for year/month folders and day files
- make year/month/day file reading strict: only exact expected months and days out of all possible months and days
* Restore accidentally-deleted self.sort() line
* Use match instead of string comparison to be os-agnostic
* Explicitly declare static methods
* Filter with glob first for max performance
* Explicitly check for valid dates in FolderJournal and add unit test
* Remove unneeded jrnl import
* Clean up method comment and add type hints
* Add is_valid_date unit test
* Elucidate comment

Co-authored-by: Jonathan Wren <jonathan@nowandwren.com>
This commit is contained in:
Micah Jerome Ellison 2023-04-29 15:49:41 -07:00 committed by GitHub
parent 88aa2491b0
commit 95836a7dd1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 153 additions and 16 deletions

View file

@ -2,8 +2,8 @@
# License: https://www.gnu.org/licenses/gpl-3.0.html
import codecs
import fnmatch
import os
import pathlib
from typing import TYPE_CHECKING
from jrnl import time
@ -13,14 +13,11 @@ from .Journal import Journal
if TYPE_CHECKING:
from jrnl.journals import Entry
def get_files(journal_config: str) -> list[str]:
"""Searches through sub directories starting with journal_config and find all text files"""
filenames = []
for root, dirnames, f in os.walk(journal_config):
for filename in fnmatch.filter(f, "*.txt"):
filenames.append(os.path.join(root, filename))
return filenames
# glob search patterns for folder/file structure
DIGIT_PATTERN = "[0123456789]"
YEAR_PATTERN = DIGIT_PATTERN * 4
MONTH_PATTERN = "[01]" + DIGIT_PATTERN
DAY_PATTERN = "[0123]" + DIGIT_PATTERN + ".txt"
class Folder(Journal):
@ -35,12 +32,15 @@ class Folder(Journal):
def open(self) -> "Folder":
filenames = []
self.entries = []
filenames = get_files(self.config["journal"])
if os.path.exists(self.config["journal"]):
filenames = Folder._get_files(self.config["journal"])
for filename in filenames:
with codecs.open(filename, "r", "utf-8") as f:
journal = f.read()
self.entries.extend(self._parse(journal))
self.sort()
return self
def write(self) -> None:
@ -81,7 +81,7 @@ class Folder(Journal):
journal_file.write(journal)
# look for and delete empty files
filenames = []
filenames = get_files(self.config["journal"])
filenames = Folder._get_files(self.config["journal"])
for filename in filenames:
if os.stat(filename).st_size <= 0:
os.remove(filename)
@ -119,3 +119,39 @@ class Folder(Journal):
self.increment_change_counts_by_edit(mod_entries)
self.entries = mod_entries
@staticmethod
def _get_files(journal_path: str) -> list[str]:
"""Searches through sub directories starting with journal_path and find all text files that look like entries"""
for year_folder in Folder._get_year_folders(pathlib.Path(journal_path)):
for month_folder in Folder._get_month_folders(year_folder):
yield from Folder._get_day_files(month_folder)
@staticmethod
def _get_year_folders(path: pathlib.Path) -> list[pathlib.Path]:
for child in path.glob(YEAR_PATTERN):
if child.is_dir():
yield child
return
@staticmethod
def _get_month_folders(path: pathlib.Path) -> list[pathlib.Path]:
for child in path.glob(MONTH_PATTERN):
if int(child.name) > 0 and int(child.name) <= 12 and path.is_dir():
yield child
return
@staticmethod
def _get_day_files(path: pathlib.Path) -> list[str]:
for child in path.glob(DAY_PATTERN):
if (
int(child.stem) > 0
and int(child.stem) <= 31
and time.is_valid_date(
year=int(path.parent.name),
month=int(path.name),
day=int(child.stem),
)
and child.is_file()
):
yield str(child)

View file

@ -89,3 +89,11 @@ def parse(
if dt.days < -28 and not year_present:
date = date.replace(date.year - 1)
return date
def is_valid_date(year: int, month: int, day: int) -> bool:
try:
datetime.datetime(year, month, day)
return True
except ValueError:
return False

View file

@ -0,0 +1,4 @@
[2022-03-02 9:25:00 AM] This file should be ignored (month)
This text file is in a folder journal's month directory ("2020/09"), but it's not in the file name format used by jrnl for folder journal entries, so it should be ignored.
This file should not ever appear in a test.

View file

@ -0,0 +1,4 @@
[2022-03-02 9:25:00 AM] This file should be ignored (year)
This text file is in a folder journal's year directory ("2020"), but it's not in the file name format used by jrnl for folder journal entries, so it should be ignored.
This file should not ever appear in a test.

View file

@ -0,0 +1,4 @@
[2022-03-02 9:25:00 AM] This file should be ignored (root)
This text file is in a folder journal's root directory, but it's not in the file name format used by jrnl for folder journal entries, so it should be ignored.
This file should not ever appear in a test.

View file

@ -0,0 +1,59 @@
# Copyright © 2012-2023 jrnl contributors
# License: https://www.gnu.org/licenses/gpl-3.0.html
import pathlib
from unittest import mock
import pytest
from jrnl.journals.FolderJournal import Folder
@pytest.mark.parametrize(
"inputs_and_outputs",
[
[
"/2020/01",
["02.txt", "03.txt", "31.txt"],
["/2020/01/02.txt", "/2020/01/03.txt", "/2020/01/31.txt"],
],
[
"/2020/02", # leap year
["02.txt", "03.txt", "28.txt", "29.txt", "31.txt", "39.txt"],
[
"/2020/02/02.txt",
"/2020/02/03.txt",
"/2020/02/28.txt",
"/2020/02/29.txt",
],
],
[
"/2100/02", # not a leap year
["01.txt", "28.txt", "29.txt", "39.txt"],
["/2100/02/01.txt", "/2100/02/28.txt"],
],
[
"/2023/04",
["29.txt", "30.txt", "31.txt", "39.txt"],
["/2023/04/29.txt", "/2023/04/30.txt"],
],
],
)
def test_get_day_files_expected_filtering(inputs_and_outputs):
year_month_path, glob_filenames, expected_output = inputs_and_outputs
year_month_path = pathlib.Path(year_month_path)
glob_files = map(lambda x: year_month_path / x, glob_filenames)
expected_output = list(map(lambda x: str(pathlib.PurePath(x)), expected_output))
with (
mock.patch("pathlib.Path.glob", return_value=glob_files),
mock.patch.object(pathlib.Path, "is_file", return_value=True),
):
actual_output = list(Folder._get_day_files(year_month_path))
actual_output.sort()
expected_output.sort()
assert actual_output == expected_output

View file

@ -3,6 +3,8 @@
import datetime
import pytest
from jrnl import time
@ -20,3 +22,23 @@ def test_default_minute_is_added():
default_minute=30,
bracketed=False,
) == datetime.datetime(2020, 6, 20, 0, 30)
@pytest.mark.parametrize(
"inputs",
[
[2000, 2, 29, True],
[2023, 1, 0, False],
[2023, 1, 1, True],
[2023, 4, 31, False],
[2023, 12, 31, True],
[2023, 12, 32, False],
[2023, 13, 1, False],
[2100, 2, 27, True],
[2100, 2, 28, True],
[2100, 2, 29, False],
],
)
def test_is_valid_date(inputs):
year, month, day, expected_result = inputs
assert time.is_valid_date(year, month, day) == expected_result