mirror of
https://github.com/jrnl-org/jrnl.git
synced 2025-06-27 21:16:14 +02:00
Only read text files that look like entries when opening folder journal (#1697)
* Add text file that should be ignored to basic test folder journal. Makes tons of tests fail * Add additional files that should be ignored by FolderJournal * Ignore all files in folder journal except year/month/day.txt * Completely remake get_files in FolderJournal: - move get_files into FolderJournal class and add underscore prefix - create iterables to get for year/month folders and day files - make year/month/day file reading strict: only exact expected months and days out of all possible months and days * Restore accidentally-deleted self.sort() line * Use match instead of string comparison to be os-agnostic * Explicitly declare static methods * Filter with glob first for max performance * Explicitly check for valid dates in FolderJournal and add unit test * Remove unneeded jrnl import * Clean up method comment and add type hints * Add is_valid_date unit test * Elucidate comment Co-authored-by: Jonathan Wren <jonathan@nowandwren.com>
This commit is contained in:
parent
88aa2491b0
commit
95836a7dd1
7 changed files with 153 additions and 16 deletions
|
@ -2,8 +2,8 @@
|
|||
# License: https://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import codecs
|
||||
import fnmatch
|
||||
import os
|
||||
import pathlib
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from jrnl import time
|
||||
|
@ -13,14 +13,11 @@ from .Journal import Journal
|
|||
if TYPE_CHECKING:
|
||||
from jrnl.journals import Entry
|
||||
|
||||
|
||||
def get_files(journal_config: str) -> list[str]:
|
||||
"""Searches through sub directories starting with journal_config and find all text files"""
|
||||
filenames = []
|
||||
for root, dirnames, f in os.walk(journal_config):
|
||||
for filename in fnmatch.filter(f, "*.txt"):
|
||||
filenames.append(os.path.join(root, filename))
|
||||
return filenames
|
||||
# glob search patterns for folder/file structure
|
||||
DIGIT_PATTERN = "[0123456789]"
|
||||
YEAR_PATTERN = DIGIT_PATTERN * 4
|
||||
MONTH_PATTERN = "[01]" + DIGIT_PATTERN
|
||||
DAY_PATTERN = "[0123]" + DIGIT_PATTERN + ".txt"
|
||||
|
||||
|
||||
class Folder(Journal):
|
||||
|
@ -35,12 +32,15 @@ class Folder(Journal):
|
|||
def open(self) -> "Folder":
|
||||
filenames = []
|
||||
self.entries = []
|
||||
filenames = get_files(self.config["journal"])
|
||||
|
||||
if os.path.exists(self.config["journal"]):
|
||||
filenames = Folder._get_files(self.config["journal"])
|
||||
for filename in filenames:
|
||||
with codecs.open(filename, "r", "utf-8") as f:
|
||||
journal = f.read()
|
||||
self.entries.extend(self._parse(journal))
|
||||
self.sort()
|
||||
|
||||
return self
|
||||
|
||||
def write(self) -> None:
|
||||
|
@ -81,7 +81,7 @@ class Folder(Journal):
|
|||
journal_file.write(journal)
|
||||
# look for and delete empty files
|
||||
filenames = []
|
||||
filenames = get_files(self.config["journal"])
|
||||
filenames = Folder._get_files(self.config["journal"])
|
||||
for filename in filenames:
|
||||
if os.stat(filename).st_size <= 0:
|
||||
os.remove(filename)
|
||||
|
@ -119,3 +119,39 @@ class Folder(Journal):
|
|||
|
||||
self.increment_change_counts_by_edit(mod_entries)
|
||||
self.entries = mod_entries
|
||||
|
||||
@staticmethod
|
||||
def _get_files(journal_path: str) -> list[str]:
|
||||
"""Searches through sub directories starting with journal_path and find all text files that look like entries"""
|
||||
for year_folder in Folder._get_year_folders(pathlib.Path(journal_path)):
|
||||
for month_folder in Folder._get_month_folders(year_folder):
|
||||
yield from Folder._get_day_files(month_folder)
|
||||
|
||||
@staticmethod
|
||||
def _get_year_folders(path: pathlib.Path) -> list[pathlib.Path]:
|
||||
for child in path.glob(YEAR_PATTERN):
|
||||
if child.is_dir():
|
||||
yield child
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def _get_month_folders(path: pathlib.Path) -> list[pathlib.Path]:
|
||||
for child in path.glob(MONTH_PATTERN):
|
||||
if int(child.name) > 0 and int(child.name) <= 12 and path.is_dir():
|
||||
yield child
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def _get_day_files(path: pathlib.Path) -> list[str]:
|
||||
for child in path.glob(DAY_PATTERN):
|
||||
if (
|
||||
int(child.stem) > 0
|
||||
and int(child.stem) <= 31
|
||||
and time.is_valid_date(
|
||||
year=int(path.parent.name),
|
||||
month=int(path.name),
|
||||
day=int(child.stem),
|
||||
)
|
||||
and child.is_file()
|
||||
):
|
||||
yield str(child)
|
||||
|
|
|
@ -89,3 +89,11 @@ def parse(
|
|||
if dt.days < -28 and not year_present:
|
||||
date = date.replace(date.year - 1)
|
||||
return date
|
||||
|
||||
|
||||
def is_valid_date(year: int, month: int, day: int) -> bool:
|
||||
try:
|
||||
datetime.datetime(year, month, day)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
[2022-03-02 9:25:00 AM] This file should be ignored (month)
|
||||
This text file is in a folder journal's month directory ("2020/09"), but it's not in the file name format used by jrnl for folder journal entries, so it should be ignored.
|
||||
|
||||
This file should not ever appear in a test.
|
|
@ -0,0 +1,4 @@
|
|||
[2022-03-02 9:25:00 AM] This file should be ignored (year)
|
||||
This text file is in a folder journal's year directory ("2020"), but it's not in the file name format used by jrnl for folder journal entries, so it should be ignored.
|
||||
|
||||
This file should not ever appear in a test.
|
4
tests/data/journals/basic_folder/should-be-ignored.txt
Normal file
4
tests/data/journals/basic_folder/should-be-ignored.txt
Normal file
|
@ -0,0 +1,4 @@
|
|||
[2022-03-02 9:25:00 AM] This file should be ignored (root)
|
||||
This text file is in a folder journal's root directory, but it's not in the file name format used by jrnl for folder journal entries, so it should be ignored.
|
||||
|
||||
This file should not ever appear in a test.
|
59
tests/unit/test_journals_folder_journal.py
Normal file
59
tests/unit/test_journals_folder_journal.py
Normal file
|
@ -0,0 +1,59 @@
|
|||
# Copyright © 2012-2023 jrnl contributors
|
||||
# License: https://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import pathlib
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
from jrnl.journals.FolderJournal import Folder
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"inputs_and_outputs",
|
||||
[
|
||||
[
|
||||
"/2020/01",
|
||||
["02.txt", "03.txt", "31.txt"],
|
||||
["/2020/01/02.txt", "/2020/01/03.txt", "/2020/01/31.txt"],
|
||||
],
|
||||
[
|
||||
"/2020/02", # leap year
|
||||
["02.txt", "03.txt", "28.txt", "29.txt", "31.txt", "39.txt"],
|
||||
[
|
||||
"/2020/02/02.txt",
|
||||
"/2020/02/03.txt",
|
||||
"/2020/02/28.txt",
|
||||
"/2020/02/29.txt",
|
||||
],
|
||||
],
|
||||
[
|
||||
"/2100/02", # not a leap year
|
||||
["01.txt", "28.txt", "29.txt", "39.txt"],
|
||||
["/2100/02/01.txt", "/2100/02/28.txt"],
|
||||
],
|
||||
[
|
||||
"/2023/04",
|
||||
["29.txt", "30.txt", "31.txt", "39.txt"],
|
||||
["/2023/04/29.txt", "/2023/04/30.txt"],
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_get_day_files_expected_filtering(inputs_and_outputs):
|
||||
year_month_path, glob_filenames, expected_output = inputs_and_outputs
|
||||
|
||||
year_month_path = pathlib.Path(year_month_path)
|
||||
|
||||
glob_files = map(lambda x: year_month_path / x, glob_filenames)
|
||||
expected_output = list(map(lambda x: str(pathlib.PurePath(x)), expected_output))
|
||||
|
||||
with (
|
||||
mock.patch("pathlib.Path.glob", return_value=glob_files),
|
||||
mock.patch.object(pathlib.Path, "is_file", return_value=True),
|
||||
):
|
||||
actual_output = list(Folder._get_day_files(year_month_path))
|
||||
actual_output.sort()
|
||||
|
||||
expected_output.sort()
|
||||
|
||||
assert actual_output == expected_output
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
import datetime
|
||||
|
||||
import pytest
|
||||
|
||||
from jrnl import time
|
||||
|
||||
|
||||
|
@ -20,3 +22,23 @@ def test_default_minute_is_added():
|
|||
default_minute=30,
|
||||
bracketed=False,
|
||||
) == datetime.datetime(2020, 6, 20, 0, 30)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"inputs",
|
||||
[
|
||||
[2000, 2, 29, True],
|
||||
[2023, 1, 0, False],
|
||||
[2023, 1, 1, True],
|
||||
[2023, 4, 31, False],
|
||||
[2023, 12, 31, True],
|
||||
[2023, 12, 32, False],
|
||||
[2023, 13, 1, False],
|
||||
[2100, 2, 27, True],
|
||||
[2100, 2, 28, True],
|
||||
[2100, 2, 29, False],
|
||||
],
|
||||
)
|
||||
def test_is_valid_date(inputs):
|
||||
year, month, day, expected_result = inputs
|
||||
assert time.is_valid_date(year, month, day) == expected_result
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue