[Fix] categorize_data now works w/ Transaction

Updates `categorize_data` to work with the `Transaction` class, which is now
used by the database.
Vacation categorizing will no longer replace previous category assigned.
`DBManager` gained new method to retrieve transactions between two dates
without any category.
Add method to `Transaction` to prepare for category update in database.
Transaction's returns from database queries are checked and assigned in
`if`s with the walrus (`:=`) operator.
This commit is contained in:
Luís Murta 2021-07-03 17:47:40 +01:00
parent 1139dff249
commit d8c9f3f9b9
Signed by: satprog
GPG Key ID: DDF2EFC6179009DC
3 changed files with 80 additions and 38 deletions

View File

@ -1,10 +1,15 @@
from __future__ import annotations
from collections import namedtuple
from typing import TYPE_CHECKING
import datetime as dt
import logging
import re
import yaml
from .database import DBManager
if TYPE_CHECKING:
from pfbudget.database import DBManager
from pfbudget.transactions import Transaction
Options = namedtuple(
@ -36,19 +41,22 @@ def categorize_data(db: DBManager):
vacations(db)
# 3rd) Classify all else based on regex
transactions = [list(t) for t in db.get_uncategorized_transactions()]
if transactions := db.get_uncategorized_transactions():
for transaction in transactions:
if not transaction[4]:
if not transaction.category:
for name, category in categories.items():
if matches(transaction, category):
transaction[4] = name
transaction.category = name
break
db.update_categories(transactions)
db.update_categories(
[transaction for transaction in transactions if transaction.category]
)
# 4th) Manually update categories from the uncategorized transactions
transactions = [list(t) for t in db.get_uncategorized_transactions()]
if transactions:
print(f"Still {len(transactions)} uncategorized transactions left")
if transactions := db.get_uncategorized_transactions():
print(
f"Still {len(transactions)} uncategorized transactions left. Type quit/exit to exit the program."
)
for transaction in transactions:
while True:
category = input(f"{transaction} category: ")
@ -59,7 +67,7 @@ def categorize_data(db: DBManager):
f"Category {category} doesn't exist. Please use one of {categories.keys()}"
)
else:
transaction[4] = category
transaction.category = category
db.update_category(transaction)
break
@ -75,13 +83,18 @@ def vacations(db: DBManager) -> None:
logging.warning(f"{e} continuing...")
continue
not_vacations = categories["Travel"].negative_regex
not_vacations = categories["Travel"].negative_regex # default is []
if transactions := [
list(t) for t in db.get_daterage_without(start, end, *not_vacations)
]:
if transactions := db.get_daterange_uncategorized_transactions(start, end):
for transaction in transactions:
transaction[4] = "Travel"
if not_vacations:
for category in not_vacations:
if not matches(
transaction, categories.get(category, Options())
):
transaction.category = "Travel"
else:
transaction.category = "Travel"
db.update_categories(transactions)
@ -91,36 +104,40 @@ def vacations(db: DBManager) -> None:
def nulls(db: DBManager) -> None:
null = categories.get("Null", Options())
transactions = [list(t) for t in db.get_uncategorized_transactions()]
transactions = db.get_uncategorized_transactions()
if not transactions:
return
matching_transactions = []
for t in transactions:
for cancel in (
cancel
for cancel in transactions
if (
dt.datetime.fromisoformat(t[0]) - dt.timedelta(days=null.timedelta)
<= dt.datetime.fromisoformat(cancel[0])
and dt.datetime.fromisoformat(cancel[0])
<= dt.datetime.fromisoformat(t[0]) + dt.timedelta(days=null.timedelta)
t.date - dt.timedelta(days=null.timedelta)
<= cancel.date
<= t.date + dt.timedelta(days=null.timedelta)
and (matches(t, null) if null.regex else True)
and t[2] != cancel[2]
and t.bank != cancel.bank
and t not in matching_transactions
and cancel not in matching_transactions
and cancel != t
and t[3] == -cancel[3]
and t.value == -cancel.value
)
):
t[4] = "Null"
cancel[4] = "Null"
t.category = "Null"
cancel.category = "Null"
matching_transactions.extend([t, cancel])
break # There will only be one match per null transaction pair
if matching_transactions:
db.update_categories(matching_transactions)
def matches(transaction, category: Options):
def matches(transaction: Transaction, category: Options):
if not category.regex:
return False
return any(
re.compile(pattern).search(transaction[1].lower()) for pattern in category.regex
re.compile(pattern).search(transaction.description.lower())
for pattern in category.regex
)

View File

@ -78,6 +78,13 @@ FROM transactions
WHERE category IS (?)
"""
SELECT_TRANSACTIONS_BETWEEN_DATES_WITH_CATEGORY = """
SELECT *
FROM transactions
WHERE date BETWEEN (?) AND (?)
AND category IS (?)
"""
SELECT_TRANSACTION_BY_PERIOD = """
SELECT EXTRACT((?) FROM date) AS (?), date, description, bank, value
FROM transactions
@ -165,12 +172,13 @@ class DBManager:
def update_category(self, transaction: Transaction):
logger.info(f"Update {transaction} category")
self.__execute(UPDATE_CATEGORY, (transaction[4], *transaction[:4]))
self.__execute(UPDATE_CATEGORY, transaction.update_category())
def update_categories(self, transactions: list[Transaction]):
logger.info(f"Update {len(transactions)} transactions' categories")
self.__executemany(
UPDATE_CATEGORY, [transaction for transaction in transactions]
UPDATE_CATEGORY,
[transaction.update_category() for transaction in transactions],
)
def get_duplicated_transactions(self) -> list[Transaction] | None:
@ -195,12 +203,25 @@ class DBManager:
return None
def get_category(self, value: str) -> list[Transaction] | None:
logger.info(f"Get transaction where category = {value}")
logger.info(f"Get transactions where category = {value}")
transactions = self.__execute(SELECT_TRANSACTIONS_BY_CATEGORY, (value,))
if transactions:
return [Transaction(t) for t in transactions]
return None
def get_daterange_category(
self, start: datetime, end: datetime, category: str
) -> list[Transaction] | None:
logger.info(
f"Get transactions from {start} to {end} where category = {category}"
)
transactions = self.__execute(
SELECT_TRANSACTIONS_BETWEEN_DATES_WITH_CATEGORY, (start, end, category)
)
if transactions:
return [Transaction(t) for t in transactions]
return None
def get_by_period(self, period: str) -> list[Transaction] | None:
logger.info(f"Get transactions by {period}")
transactions = self.__execute(SELECT_TRANSACTION_BY_PERIOD, period)
@ -209,11 +230,12 @@ class DBManager:
return None
def get_uncategorized_transactions(self) -> list[Transaction] | None:
logger.info("Get uncategorized transactions")
transactions = self.get_category(None)
if transactions:
return [Transaction(t) for t in transactions]
return None
logger.debug("Get uncategorized transactions")
return self.get_category(None)
def get_daterange_uncategorized_transactions(self, start: datetime, end: datetime):
logger.debug("Get uncategorized transactions from {start} to {end}")
return self.get_daterange_category(start, end, None)
def get_daterage_without(
self, start: datetime, end: datetime, *categories: str

View File

@ -46,6 +46,9 @@ class Transaction:
def to_list(self):
return [self.date, self.description, self.bank, self.value, self.category]
def update_category(self):
return (self.category, self.date, self.description, self.bank, self.value)
@property
def category(self):
return self._category