budget/pfbudget/categories.py
Luís Murta deaa71ead4
Categorizing refactored with SQLite DB and YAML
`categorize_data` is the new entry for data categorization and receives
a DBManager. Contains the categorizing logic.
Categorizer configuration now done solely from categories.yaml file.

Ancilliary database methods added to DBManager required for categorizing
transactions.

Adds categorize to command line options.
Removes obsolete restart options and method from runnable.py.
Fixes parse and categorize method, now take cmd line arguments and
`DBManager`.

Removes obsolete tools.py, all functions already rewritten in relevant
modules.

Updated categories.yaml with new keys.
2021-06-11 22:11:07 +01:00

127 lines
4.1 KiB
Python

from collections import namedtuple
import datetime as dt
import logging
import re
import yaml
from .database import DBManager
Options = namedtuple(
"Options",
[
"regex",
"banks",
"regular",
"negative_regex",
"date_fmt",
"vacations",
"timedelta",
],
defaults=[[], [], [], [], "", [], 4],
)
cfg = yaml.safe_load(open("categories.yaml"))
categories = {k: Options(**v) if v else Options() for k, v in cfg.items()}
def categorize_data(db: DBManager):
# 1st) Classifying null transactions, i.e. transfers between banks.
# Will not overwrite previous categories
nulls(db)
# 2nd) Classifying all vacations by vacation dates
# Will not overwrite previous categories
vacations(db)
# 3rd) Classify all else based on regex
transactions = [list(t) for t in db.get_uncategorized_transactions()]
for transaction in transactions:
if not transaction[4]:
for name, category in categories.items():
if matches(transaction, category):
transaction[4] = name
break
db.update_categories(transactions)
# 4th) Manually update categories from the uncategorized transactions
transactions = [list(t) for t in db.get_uncategorized_transactions()]
if transactions:
print(f"Still {len(transactions)} uncategorized transactions left")
for transaction in transactions:
while True:
category = input(f"{transaction} category: ")
if category == "quit" or category == "exit":
return
if category not in categories:
print(
f"Category {category} doesn't exist. Please use one of {categories.keys()}"
)
else:
transaction[4] = category
db.update_category(transaction)
break
def vacations(db: DBManager) -> None:
try:
date_fmt = categories["Travel"].date_fmt
for start, end in categories["Travel"].vacations:
try:
start = dt.datetime.strptime(start, date_fmt).date().isoformat()
end = dt.datetime.strptime(end, date_fmt).date().isoformat()
except ValueError as e:
logging.warning(f"{e} continuing...")
continue
not_vacations = categories["Travel"].negative_regex
if transactions := [
list(t) for t in db.get_daterage_without(start, end, *not_vacations)
]:
for transaction in transactions:
transaction[4] = "Travel"
db.update_categories(transactions)
except KeyError as e:
print(e)
def nulls(db: DBManager) -> None:
null = categories.get("Null", Options())
transactions = [list(t) for t in db.get_uncategorized_transactions()]
matching_transactions = []
for t in transactions:
for cancel in (
cancel
for cancel in transactions
if (
dt.datetime.fromisoformat(t[0]) - dt.timedelta(days=null.timedelta)
<= dt.datetime.fromisoformat(cancel[0])
and dt.datetime.fromisoformat(cancel[0])
<= dt.datetime.fromisoformat(t[0]) + dt.timedelta(days=null.timedelta)
and (matches(t, null) if null.regex else True)
and t[2] != cancel[2]
and t not in matching_transactions
and cancel not in matching_transactions
and cancel != t
and t[3] == -cancel[3]
)
):
t[4] = "Null"
cancel[4] = "Null"
matching_transactions.extend([t, cancel])
break # There will only be one match per null transaction pair
db.update_categories(matching_transactions)
def matches(transaction, category: Options):
if not category.regex:
return False
return any(
re.compile(pattern).search(transaction[1].lower()) for pattern in category.regex
)