From d321481e29a3c98e148021d81ac0d735bf93fe07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Murta?= Date: Sat, 10 Dec 2022 00:22:33 +0000 Subject: [PATCH] Rule based categorizing Uses the rules defined for each category to classify each transaction. Fixes the categorize command, which was broken from previous refactors. Swaps str type on the categories_rules date to date. --- .../7adf89ec8d14_category_rule_date_format.py | 43 ++++++++++++++++++ pfbudget/cli/runnable.py | 8 ++-- pfbudget/core/categorizer.py | 44 +++++++++++++++++-- pfbudget/core/manager.py | 10 ++--- pfbudget/db/client.py | 4 ++ pfbudget/db/model.py | 15 +++---- 6 files changed, 102 insertions(+), 22 deletions(-) create mode 100644 alembic/versions/7adf89ec8d14_category_rule_date_format.py diff --git a/alembic/versions/7adf89ec8d14_category_rule_date_format.py b/alembic/versions/7adf89ec8d14_category_rule_date_format.py new file mode 100644 index 0000000..176734c --- /dev/null +++ b/alembic/versions/7adf89ec8d14_category_rule_date_format.py @@ -0,0 +1,43 @@ +"""Category rule date format + +Revision ID: 7adf89ec8d14 +Revises: 83603bb7ef9c +Create Date: 2022-12-10 00:08:47.535765+00:00 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "7adf89ec8d14" +down_revision = "83603bb7ef9c" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "categories_rules", + "date", + existing_type=sa.VARCHAR(), + type_=sa.Date(), + existing_nullable=True, + schema="transactions", + postgresql_using="date::date" + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "categories_rules", + "date", + existing_type=sa.Date(), + type_=sa.VARCHAR(), + existing_nullable=True, + schema="transactions", + ) + # ### end Alembic commands ### diff --git a/pfbudget/cli/runnable.py b/pfbudget/cli/runnable.py index 49f6f77..9222e6d 100644 --- a/pfbudget/cli/runnable.py +++ b/pfbudget/cli/runnable.py @@ -1,5 +1,6 @@ from pathlib import Path import argparse +import datetime as dt import re from pfbudget.common.types import Operation @@ -109,13 +110,12 @@ def argparser() -> argparse.ArgumentParser: """ Categorizing """ - p_categorize = subparsers.add_parser( + categorize = subparsers.add_parser( "categorize", description="Categorizes the transactions in the selected database", parents=[universal], - formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) - p_categorize.set_defaults(command=Operation.Categorize) + categorize.set_defaults(op=Operation.Categorize) """ Graph @@ -343,7 +343,7 @@ def category(parser: argparse.ArgumentParser, universal: argparse.ArgumentParser rule = commands.add_parser("rule", parents=[universal]) rule.set_defaults(op=Operation.CategoryRule) rule.add_argument("category", nargs="+", type=str) - rule.add_argument("--date", nargs=1, type=str) + rule.add_argument("--date", nargs=1, type=dt.date.fromisoformat) rule.add_argument("--description", nargs=1, type=str) rule.add_argument("--bank", nargs=1, type=str) rule.add_argument("--min", nargs=1, type=float) diff --git a/pfbudget/core/categorizer.py b/pfbudget/core/categorizer.py index 7b4f8e2..793ba59 100644 --- a/pfbudget/core/categorizer.py +++ b/pfbudget/core/categorizer.py @@ -1,4 +1,10 @@ -from pfbudget.db.model import Transaction, TransactionCategory +from pfbudget.db.model import ( + Category, + CategorySelector, + Selector, + Transaction, + TransactionCategory, +) from datetime import timedelta @@ -9,7 +15,7 @@ class Categorizer: def __init__(self): self.options["null_days"] = 4 - def categorize(self, transactions: list[Transaction]): + def categorize(self, transactions: list[Transaction], categories: list[Category]): """Overarching categorization tool Receives a list of transactions (by ref) and updates their category @@ -19,6 +25,7 @@ class Categorizer: """ self._nullify(transactions) + self._rules(transactions, categories) def _nullify(self, transactions: list[Transaction]): count = 0 @@ -38,10 +45,39 @@ class Categorizer: and cancel.amount == -transaction.amount ) ): - transaction.category = TransactionCategory(name="null") - cancel.category = TransactionCategory(name="null") + transaction.category = TransactionCategory( + name="null", selector=CategorySelector(Selector.nullifier) + ) + cancel.category = TransactionCategory( + name="null", selector=CategorySelector(Selector.nullifier) + ) matching.extend([transaction, cancel]) count += 2 break print(f"Nullified {count} transactions") + + def _rules(self, transactions: list[Transaction], categories: list[Category]): + for category in [c for c in categories if c.rules]: + for rule in category.rules: + for transaction in [t for t in transactions if not t.category]: + if rule.date: + if rule.date < transaction.date: + continue + if rule.description: + if rule.description not in transaction.description: + continue + if rule.bank: + if rule.bank != transaction.bank: + continue + if rule.min_amount: + if rule.min_amount > transaction.amount: + continue + if rule.max_amount: + if rule.max_amount <= transaction.amount: + continue + + # passed all conditions, assign category + transaction.category = TransactionCategory( + category.name, CategorySelector(Selector.rules) + ) diff --git a/pfbudget/core/manager.py b/pfbudget/core/manager.py index 2227ccf..d744af8 100644 --- a/pfbudget/core/manager.py +++ b/pfbudget/core/manager.py @@ -28,7 +28,10 @@ class Manager: # TODO this is a monstrosity, remove when possible download(self, self.args) case Operation.Categorize: - self.categorize() + with self.db.session() as session: + uncategorized = session.uncategorized() + categories = session.categories() + Categorizer().categorize(uncategorized, categories) case Operation.Register: # self._db = DbClient(args["database"]) @@ -106,11 +109,6 @@ class Manager: with self.db.session() as session: session.add(transactions) - def categorize(self): - with self.db.session() as session: - uncategorized = session.uncategorized() - Categorizer().categorize(uncategorized) - # def get_bank_by(self, key: str, value: str) -> Bank: # client = DatabaseClient(self.__db) # bank = client.get_bank(key, value) diff --git a/pfbudget/db/client.py b/pfbudget/db/client.py index 28c2b39..5bdd558 100644 --- a/pfbudget/db/client.py +++ b/pfbudget/db/client.py @@ -129,5 +129,9 @@ class DbClient: stmt = select(Transaction).where(~Transaction.category.has()) return self.__session.scalars(stmt).all() + def categories(self) -> list[Category]: + stmt = select(Category) + return self.__session.scalars(stmt).all() + def session(self) -> ClientSession: return self.ClientSession(self.engine) diff --git a/pfbudget/db/model.py b/pfbudget/db/model.py index 3eab450..b70ec0e 100644 --- a/pfbudget/db/model.py +++ b/pfbudget/db/model.py @@ -81,9 +81,7 @@ class Transaction(Base): bank: Mapped[bankfk] amount: Mapped[money] - category: Mapped[Optional[TransactionCategory]] = relationship( - back_populates="original", lazy="joined", default=None - ) + category: Mapped[Optional[TransactionCategory]] = relationship() note: Mapped[Optional[Note]] = relationship(back_populates="original", default=None) tags: Mapped[Optional[set[Tag]]] = relationship( back_populates="original", @@ -138,8 +136,7 @@ class TransactionCategory(Base): id: Mapped[idfk] = mapped_column(primary_key=True, init=False) name: Mapped[str] = mapped_column(ForeignKey(Category.name)) - original: Mapped[Transaction] = relationship(back_populates="category") - selector: Mapped[CategorySelector] = relationship(back_populates="category") + selector: Mapped[CategorySelector] = relationship() def __repr__(self) -> str: return f"Category({self.name})" @@ -182,12 +179,15 @@ class CategoryRule(Base): id: Mapped[idpk] = mapped_column(autoincrement=True, init=False) name: Mapped[catfk] = mapped_column() - date: Mapped[Optional[str]] = mapped_column() + date: Mapped[Optional[dt.date]] = mapped_column() description: Mapped[Optional[str]] = mapped_column() bank: Mapped[Optional[str]] = mapped_column() min_amount: Mapped[Optional[float]] = mapped_column() max_amount: Mapped[Optional[float]] = mapped_column() + def __hash__(self): + return hash(self.id) + class Selector(enum.Enum): unknown = enum.auto() @@ -211,11 +211,10 @@ class CategorySelector(Base): BigInteger, ForeignKey(TransactionCategory.id, ondelete="CASCADE"), primary_key=True, + init=False, ) selector: Mapped[categoryselector] - category: Mapped[TransactionCategory] = relationship(back_populates="selector") - class Period(enum.Enum): daily = "daily"