budget/pfbudget/transform/categorizer.py
Luís Murta d3c36e30d5
[Refactor] Creates Transform interface
Moves categorizer into transform module.
Puts the categorizer under unit tests.
2023-04-22 22:21:00 +01:00

155 lines
5.5 KiB
Python

from codetiming import Timer
from datetime import timedelta
from typing import Sequence
import pfbudget.db.model as t
class Categorizer:
options = {}
def __init__(self):
self.options["null_days"] = 3
def rules(
self,
transactions: Sequence[t.BankTransaction],
categories: Sequence[t.Category],
tags: Sequence[t.Tag],
nullify: bool = True
):
"""Overarching categorization tool
Receives a list of transactions (by ref) and updates their category according
to the rules defined for each category
Args:
transactions (Sequence[BankTransaction]): uncategorized transactions
categories (Sequence[Category]): available categories
tags (Sequence[Tag]): currently available tags
"""
if nullify:
try:
null = next(cat for cat in categories if cat.name == "null")
print("Nullifying")
self._nullify(transactions, null)
except StopIteration:
print("Null category not defined")
categories = [cat for cat in categories if cat.name != "null"]
self._rule_based_categories(transactions, categories)
self._rule_based_tags(transactions, tags)
@Timer(name="nullify")
def _nullify(self, transactions: Sequence[t.BankTransaction], null: t.Category):
count = 0
matching = []
for transaction in transactions:
for cancel in (
cancel
for cancel in transactions
if (
transaction.date - timedelta(days=self.options["null_days"])
<= cancel.date
<= transaction.date + timedelta(days=self.options["null_days"])
and cancel != transaction
and cancel.bank != transaction.bank
and cancel.amount == -transaction.amount
and transaction not in matching
and cancel not in matching
and all(r.matches(transaction) for r in null.rules)
and all(r.matches(cancel) for r in null.rules)
)
):
transaction.category = t.TransactionCategory(
name="null",
selector=t.CategorySelector(t.Selector_T.nullifier),
)
cancel.category = t.TransactionCategory(
name="null",
selector=t.CategorySelector(t.Selector_T.nullifier),
)
matching.extend([transaction, cancel])
count += 2
break
print(f"Nullified {count} of {len(transactions)} transactions")
@Timer(name="categoryrules")
def _rule_based_categories(
self,
transactions: Sequence[t.BankTransaction],
categories: Sequence[t.Category],
):
print(f"Categorizing {len(transactions)} transactions")
d = {}
for category in [c for c in categories if c.rules]:
for rule in category.rules:
# for transaction in [t for t in transactions if not t.category]:
for transaction in [
t
for t in transactions
if not t.category or t.category.name != "null"
]:
if not rule.matches(transaction):
continue
# passed all conditions, assign category
if transaction.category:
if transaction.category.name == category.name:
continue
if (
input(
f"Overwrite {transaction} with {category.name}? (y/n)"
)
== "y"
):
transaction.category.name = category.name
transaction.category.selector.selector = t.Selector_T.rules
else:
transaction.category = t.TransactionCategory(
category.name, t.CategorySelector(t.Selector_T.rules)
)
if rule in d:
d[rule] += 1
else:
d[rule] = 1
for k, v in d.items():
print(f"{v}: {k}")
@Timer(name="tagrules")
def _rule_based_tags(
self, transactions: Sequence[t.BankTransaction], tags: Sequence[t.Tag]
):
print(f"Tagging {len(transactions)} transactions")
d = {}
for tag in [t for t in tags if len(t.rules) > 0]:
for rule in tag.rules:
# for transaction in [t for t in transactions if not t.category]:
for transaction in [
t
for t in transactions
if tag.name not in [tag.tag for tag in t.tags]
]:
if not rule.matches(transaction):
continue
if not transaction.tags:
transaction.tags = {t.TransactionTag(tag.name)}
else:
transaction.tags.add(t.TransactionTag(tag.name))
if rule in d:
d[rule] += 1
else:
d[rule] = 1
for k, v in d.items():
print(f"{v}: {k}")