Rule based categorizing

Uses the rules defined for each category to classify each transaction.
Fixes the categorize command, which was broken from previous refactors.
Swaps str type on the categories_rules date to date.
This commit is contained in:
Luís Murta 2022-12-10 00:22:33 +00:00
parent fed007ff89
commit d321481e29
Signed by: satprog
GPG Key ID: 169EF1BBD7049F94
6 changed files with 102 additions and 22 deletions

View File

@ -0,0 +1,43 @@
"""Category rule date format
Revision ID: 7adf89ec8d14
Revises: 83603bb7ef9c
Create Date: 2022-12-10 00:08:47.535765+00:00
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "7adf89ec8d14"
down_revision = "83603bb7ef9c"
branch_labels = None
depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column(
"categories_rules",
"date",
existing_type=sa.VARCHAR(),
type_=sa.Date(),
existing_nullable=True,
schema="transactions",
postgresql_using="date::date"
)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column(
"categories_rules",
"date",
existing_type=sa.Date(),
type_=sa.VARCHAR(),
existing_nullable=True,
schema="transactions",
)
# ### end Alembic commands ###

View File

@ -1,5 +1,6 @@
from pathlib import Path
import argparse
import datetime as dt
import re
from pfbudget.common.types import Operation
@ -109,13 +110,12 @@ def argparser() -> argparse.ArgumentParser:
"""
Categorizing
"""
p_categorize = subparsers.add_parser(
categorize = subparsers.add_parser(
"categorize",
description="Categorizes the transactions in the selected database",
parents=[universal],
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
p_categorize.set_defaults(command=Operation.Categorize)
categorize.set_defaults(op=Operation.Categorize)
"""
Graph
@ -343,7 +343,7 @@ def category(parser: argparse.ArgumentParser, universal: argparse.ArgumentParser
rule = commands.add_parser("rule", parents=[universal])
rule.set_defaults(op=Operation.CategoryRule)
rule.add_argument("category", nargs="+", type=str)
rule.add_argument("--date", nargs=1, type=str)
rule.add_argument("--date", nargs=1, type=dt.date.fromisoformat)
rule.add_argument("--description", nargs=1, type=str)
rule.add_argument("--bank", nargs=1, type=str)
rule.add_argument("--min", nargs=1, type=float)

View File

@ -1,4 +1,10 @@
from pfbudget.db.model import Transaction, TransactionCategory
from pfbudget.db.model import (
Category,
CategorySelector,
Selector,
Transaction,
TransactionCategory,
)
from datetime import timedelta
@ -9,7 +15,7 @@ class Categorizer:
def __init__(self):
self.options["null_days"] = 4
def categorize(self, transactions: list[Transaction]):
def categorize(self, transactions: list[Transaction], categories: list[Category]):
"""Overarching categorization tool
Receives a list of transactions (by ref) and updates their category
@ -19,6 +25,7 @@ class Categorizer:
"""
self._nullify(transactions)
self._rules(transactions, categories)
def _nullify(self, transactions: list[Transaction]):
count = 0
@ -38,10 +45,39 @@ class Categorizer:
and cancel.amount == -transaction.amount
)
):
transaction.category = TransactionCategory(name="null")
cancel.category = TransactionCategory(name="null")
transaction.category = TransactionCategory(
name="null", selector=CategorySelector(Selector.nullifier)
)
cancel.category = TransactionCategory(
name="null", selector=CategorySelector(Selector.nullifier)
)
matching.extend([transaction, cancel])
count += 2
break
print(f"Nullified {count} transactions")
def _rules(self, transactions: list[Transaction], categories: list[Category]):
for category in [c for c in categories if c.rules]:
for rule in category.rules:
for transaction in [t for t in transactions if not t.category]:
if rule.date:
if rule.date < transaction.date:
continue
if rule.description:
if rule.description not in transaction.description:
continue
if rule.bank:
if rule.bank != transaction.bank:
continue
if rule.min_amount:
if rule.min_amount > transaction.amount:
continue
if rule.max_amount:
if rule.max_amount <= transaction.amount:
continue
# passed all conditions, assign category
transaction.category = TransactionCategory(
category.name, CategorySelector(Selector.rules)
)

View File

@ -28,7 +28,10 @@ class Manager:
# TODO this is a monstrosity, remove when possible
download(self, self.args)
case Operation.Categorize:
self.categorize()
with self.db.session() as session:
uncategorized = session.uncategorized()
categories = session.categories()
Categorizer().categorize(uncategorized, categories)
case Operation.Register:
# self._db = DbClient(args["database"])
@ -106,11 +109,6 @@ class Manager:
with self.db.session() as session:
session.add(transactions)
def categorize(self):
with self.db.session() as session:
uncategorized = session.uncategorized()
Categorizer().categorize(uncategorized)
# def get_bank_by(self, key: str, value: str) -> Bank:
# client = DatabaseClient(self.__db)
# bank = client.get_bank(key, value)

View File

@ -129,5 +129,9 @@ class DbClient:
stmt = select(Transaction).where(~Transaction.category.has())
return self.__session.scalars(stmt).all()
def categories(self) -> list[Category]:
stmt = select(Category)
return self.__session.scalars(stmt).all()
def session(self) -> ClientSession:
return self.ClientSession(self.engine)

View File

@ -81,9 +81,7 @@ class Transaction(Base):
bank: Mapped[bankfk]
amount: Mapped[money]
category: Mapped[Optional[TransactionCategory]] = relationship(
back_populates="original", lazy="joined", default=None
)
category: Mapped[Optional[TransactionCategory]] = relationship()
note: Mapped[Optional[Note]] = relationship(back_populates="original", default=None)
tags: Mapped[Optional[set[Tag]]] = relationship(
back_populates="original",
@ -138,8 +136,7 @@ class TransactionCategory(Base):
id: Mapped[idfk] = mapped_column(primary_key=True, init=False)
name: Mapped[str] = mapped_column(ForeignKey(Category.name))
original: Mapped[Transaction] = relationship(back_populates="category")
selector: Mapped[CategorySelector] = relationship(back_populates="category")
selector: Mapped[CategorySelector] = relationship()
def __repr__(self) -> str:
return f"Category({self.name})"
@ -182,12 +179,15 @@ class CategoryRule(Base):
id: Mapped[idpk] = mapped_column(autoincrement=True, init=False)
name: Mapped[catfk] = mapped_column()
date: Mapped[Optional[str]] = mapped_column()
date: Mapped[Optional[dt.date]] = mapped_column()
description: Mapped[Optional[str]] = mapped_column()
bank: Mapped[Optional[str]] = mapped_column()
min_amount: Mapped[Optional[float]] = mapped_column()
max_amount: Mapped[Optional[float]] = mapped_column()
def __hash__(self):
return hash(self.id)
class Selector(enum.Enum):
unknown = enum.auto()
@ -211,11 +211,10 @@ class CategorySelector(Base):
BigInteger,
ForeignKey(TransactionCategory.id, ondelete="CASCADE"),
primary_key=True,
init=False,
)
selector: Mapped[categoryselector]
category: Mapped[TransactionCategory] = relationship(back_populates="selector")
class Period(enum.Enum):
daily = "daily"