[Fix] categorize_data now works w/ Transaction

Updates `categorize_data` to work with the `Transaction` class, which is now
used by the database.
Vacation categorizing will no longer replace previous category assigned.
`DBManager` gained new method to retrieve transactions between two dates
without any category.
Add method to `Transaction` to prepare for category update in database.
Transaction's returns from database queries are checked and assigned in
`if`s with the walrus (`:=`) operator.
This commit is contained in:
Luís Murta 2021-07-03 17:47:40 +01:00
parent 1139dff249
commit d8c9f3f9b9
Signed by: satprog
GPG Key ID: DDF2EFC6179009DC
3 changed files with 80 additions and 38 deletions

View File

@ -1,10 +1,15 @@
from __future__ import annotations
from collections import namedtuple from collections import namedtuple
from typing import TYPE_CHECKING
import datetime as dt import datetime as dt
import logging import logging
import re import re
import yaml import yaml
from .database import DBManager
if TYPE_CHECKING:
from pfbudget.database import DBManager
from pfbudget.transactions import Transaction
Options = namedtuple( Options = namedtuple(
@ -36,19 +41,22 @@ def categorize_data(db: DBManager):
vacations(db) vacations(db)
# 3rd) Classify all else based on regex # 3rd) Classify all else based on regex
transactions = [list(t) for t in db.get_uncategorized_transactions()] if transactions := db.get_uncategorized_transactions():
for transaction in transactions: for transaction in transactions:
if not transaction[4]: if not transaction.category:
for name, category in categories.items(): for name, category in categories.items():
if matches(transaction, category): if matches(transaction, category):
transaction[4] = name transaction.category = name
break break
db.update_categories(transactions) db.update_categories(
[transaction for transaction in transactions if transaction.category]
)
# 4th) Manually update categories from the uncategorized transactions # 4th) Manually update categories from the uncategorized transactions
transactions = [list(t) for t in db.get_uncategorized_transactions()] if transactions := db.get_uncategorized_transactions():
if transactions: print(
print(f"Still {len(transactions)} uncategorized transactions left") f"Still {len(transactions)} uncategorized transactions left. Type quit/exit to exit the program."
)
for transaction in transactions: for transaction in transactions:
while True: while True:
category = input(f"{transaction} category: ") category = input(f"{transaction} category: ")
@ -59,7 +67,7 @@ def categorize_data(db: DBManager):
f"Category {category} doesn't exist. Please use one of {categories.keys()}" f"Category {category} doesn't exist. Please use one of {categories.keys()}"
) )
else: else:
transaction[4] = category transaction.category = category
db.update_category(transaction) db.update_category(transaction)
break break
@ -75,13 +83,18 @@ def vacations(db: DBManager) -> None:
logging.warning(f"{e} continuing...") logging.warning(f"{e} continuing...")
continue continue
not_vacations = categories["Travel"].negative_regex not_vacations = categories["Travel"].negative_regex # default is []
if transactions := [ if transactions := db.get_daterange_uncategorized_transactions(start, end):
list(t) for t in db.get_daterage_without(start, end, *not_vacations)
]:
for transaction in transactions: for transaction in transactions:
transaction[4] = "Travel" if not_vacations:
for category in not_vacations:
if not matches(
transaction, categories.get(category, Options())
):
transaction.category = "Travel"
else:
transaction.category = "Travel"
db.update_categories(transactions) db.update_categories(transactions)
@ -91,36 +104,40 @@ def vacations(db: DBManager) -> None:
def nulls(db: DBManager) -> None: def nulls(db: DBManager) -> None:
null = categories.get("Null", Options()) null = categories.get("Null", Options())
transactions = [list(t) for t in db.get_uncategorized_transactions()] transactions = db.get_uncategorized_transactions()
if not transactions:
return
matching_transactions = [] matching_transactions = []
for t in transactions: for t in transactions:
for cancel in ( for cancel in (
cancel cancel
for cancel in transactions for cancel in transactions
if ( if (
dt.datetime.fromisoformat(t[0]) - dt.timedelta(days=null.timedelta) t.date - dt.timedelta(days=null.timedelta)
<= dt.datetime.fromisoformat(cancel[0]) <= cancel.date
and dt.datetime.fromisoformat(cancel[0]) <= t.date + dt.timedelta(days=null.timedelta)
<= dt.datetime.fromisoformat(t[0]) + dt.timedelta(days=null.timedelta)
and (matches(t, null) if null.regex else True) and (matches(t, null) if null.regex else True)
and t[2] != cancel[2] and t.bank != cancel.bank
and t not in matching_transactions and t not in matching_transactions
and cancel not in matching_transactions and cancel not in matching_transactions
and cancel != t and cancel != t
and t[3] == -cancel[3] and t.value == -cancel.value
) )
): ):
t[4] = "Null" t.category = "Null"
cancel[4] = "Null" cancel.category = "Null"
matching_transactions.extend([t, cancel]) matching_transactions.extend([t, cancel])
break # There will only be one match per null transaction pair break # There will only be one match per null transaction pair
if matching_transactions:
db.update_categories(matching_transactions) db.update_categories(matching_transactions)
def matches(transaction, category: Options): def matches(transaction: Transaction, category: Options):
if not category.regex: if not category.regex:
return False return False
return any( return any(
re.compile(pattern).search(transaction[1].lower()) for pattern in category.regex re.compile(pattern).search(transaction.description.lower())
for pattern in category.regex
) )

View File

@ -78,6 +78,13 @@ FROM transactions
WHERE category IS (?) WHERE category IS (?)
""" """
SELECT_TRANSACTIONS_BETWEEN_DATES_WITH_CATEGORY = """
SELECT *
FROM transactions
WHERE date BETWEEN (?) AND (?)
AND category IS (?)
"""
SELECT_TRANSACTION_BY_PERIOD = """ SELECT_TRANSACTION_BY_PERIOD = """
SELECT EXTRACT((?) FROM date) AS (?), date, description, bank, value SELECT EXTRACT((?) FROM date) AS (?), date, description, bank, value
FROM transactions FROM transactions
@ -165,12 +172,13 @@ class DBManager:
def update_category(self, transaction: Transaction): def update_category(self, transaction: Transaction):
logger.info(f"Update {transaction} category") logger.info(f"Update {transaction} category")
self.__execute(UPDATE_CATEGORY, (transaction[4], *transaction[:4])) self.__execute(UPDATE_CATEGORY, transaction.update_category())
def update_categories(self, transactions: list[Transaction]): def update_categories(self, transactions: list[Transaction]):
logger.info(f"Update {len(transactions)} transactions' categories") logger.info(f"Update {len(transactions)} transactions' categories")
self.__executemany( self.__executemany(
UPDATE_CATEGORY, [transaction for transaction in transactions] UPDATE_CATEGORY,
[transaction.update_category() for transaction in transactions],
) )
def get_duplicated_transactions(self) -> list[Transaction] | None: def get_duplicated_transactions(self) -> list[Transaction] | None:
@ -195,12 +203,25 @@ class DBManager:
return None return None
def get_category(self, value: str) -> list[Transaction] | None: def get_category(self, value: str) -> list[Transaction] | None:
logger.info(f"Get transaction where category = {value}") logger.info(f"Get transactions where category = {value}")
transactions = self.__execute(SELECT_TRANSACTIONS_BY_CATEGORY, (value,)) transactions = self.__execute(SELECT_TRANSACTIONS_BY_CATEGORY, (value,))
if transactions: if transactions:
return [Transaction(t) for t in transactions] return [Transaction(t) for t in transactions]
return None return None
def get_daterange_category(
self, start: datetime, end: datetime, category: str
) -> list[Transaction] | None:
logger.info(
f"Get transactions from {start} to {end} where category = {category}"
)
transactions = self.__execute(
SELECT_TRANSACTIONS_BETWEEN_DATES_WITH_CATEGORY, (start, end, category)
)
if transactions:
return [Transaction(t) for t in transactions]
return None
def get_by_period(self, period: str) -> list[Transaction] | None: def get_by_period(self, period: str) -> list[Transaction] | None:
logger.info(f"Get transactions by {period}") logger.info(f"Get transactions by {period}")
transactions = self.__execute(SELECT_TRANSACTION_BY_PERIOD, period) transactions = self.__execute(SELECT_TRANSACTION_BY_PERIOD, period)
@ -209,11 +230,12 @@ class DBManager:
return None return None
def get_uncategorized_transactions(self) -> list[Transaction] | None: def get_uncategorized_transactions(self) -> list[Transaction] | None:
logger.info("Get uncategorized transactions") logger.debug("Get uncategorized transactions")
transactions = self.get_category(None) return self.get_category(None)
if transactions:
return [Transaction(t) for t in transactions] def get_daterange_uncategorized_transactions(self, start: datetime, end: datetime):
return None logger.debug("Get uncategorized transactions from {start} to {end}")
return self.get_daterange_category(start, end, None)
def get_daterage_without( def get_daterage_without(
self, start: datetime, end: datetime, *categories: str self, start: datetime, end: datetime, *categories: str

View File

@ -46,6 +46,9 @@ class Transaction:
def to_list(self): def to_list(self):
return [self.date, self.description, self.bank, self.value, self.category] return [self.date, self.description, self.bank, self.value, self.category]
def update_category(self):
return (self.category, self.date, self.description, self.bank, self.value)
@property @property
def category(self): def category(self):
return self._category return self._category