[Fix] Unordered equal transactions wrongly matched

The nullifier algorithm was wrongly assuming that it would received an
ordered sequence of transactions. When combined with the cancelation
search only looking forward, this could cause the algorithm to not only
not throw the `MoreThanOneMatchError`, but also attempt to rewrite a
previously matches transaction.

The fix is applied both in ordering every sequence of transactions, but
also to confirm that a transactions had not previously been found. This
should now be impossible given the correct order, but its there to
prevent future misshaps.

For the specific broken sequence, look at test
TestTransform.test_nullifier_inplace_unordered.
This commit is contained in:
Luís Murta 2024-12-02 23:15:15 +00:00
parent 0d4da4132d
commit 5de3f337cf
Signed by: satprog
GPG Key ID: 169EF1BBD7049F94
2 changed files with 46 additions and 11 deletions

View File

@ -1,6 +1,6 @@
from copy import deepcopy from copy import deepcopy
import datetime as dt import datetime as dt
from typing import Sequence from typing import Iterable, Sequence
from .exceptions import MoreThanOneMatchError from .exceptions import MoreThanOneMatchError
from .transform import Transformer from .transform import Transformer
@ -17,14 +17,14 @@ class Nullifier(Transformer):
def __init__(self, rules=None): def __init__(self, rules=None):
self.rules = rules if rules else [] self.rules = rules if rules else []
def transform(self, transactions: Sequence[Transaction]) -> Sequence[Transaction]: def transform(self, transactions: Iterable[Transaction]) -> Sequence[Transaction]:
"""transform """transform
Find transactions that nullify each others, e.g. transfers between banks or Find transactions that nullify each others, e.g. transfers between banks or
between bank and credit cards. between bank and credit cards.
Args: Args:
transactions (Sequence[Transaction]): ordered sequence of transactions transactions (Sequence[Transaction]): sequence of transactions
Raises: Raises:
MoreThanOneMatchError: if there is more than a match for a single transation MoreThanOneMatchError: if there is more than a match for a single transation
@ -33,7 +33,7 @@ class Nullifier(Transformer):
Sequence[Transaction]: nullified sequence of transactions Sequence[Transaction]: nullified sequence of transactions
""" """
result = deepcopy(transactions) result = sorted(deepcopy(transactions))
for i, transaction in enumerate(result[:-1]): for i, transaction in enumerate(result[:-1]):
if matches := [t for t in result[i + 1 :] if self._cancels(transaction, t)]: if matches := [t for t in result[i + 1 :] if self._cancels(transaction, t)]:
@ -47,29 +47,29 @@ class Nullifier(Transformer):
return result return result
def transform_inplace(self, transactions: Sequence[Transaction]) -> None: def transform_inplace(self, transactions: Iterable[Transaction]) -> None:
"""_summary_ """transform_inplace
Find transactions that nullify each others, e.g. transfers between banks or Find transactions that nullify each others, e.g. transfers between banks or
between bank and credit cards. between bank and credit cards.
Args: Args:
transactions (Sequence[Transaction]): ordered sequence of transactions that transactions (Sequence[Transaction]): sequence of transactions that will be
will be modified inplace modified inplace
Raises: Raises:
MoreThanOneMatchError: if there is more than a match for a single transation MoreThanOneMatchError: if there is more than a match for a single transation
""" """
for transaction in transactions: for transaction in sorted(transactions):
if matches := [t for t in transactions if self._cancels(transaction, t)]: if matches := [t for t in transactions if self._cancels(transaction, t)]:
if len(matches) > 1: if len(matches) > 1:
raise MoreThanOneMatchError(f"{transaction} -> {matches}") raise MoreThanOneMatchError(f"{transaction} -> {matches}")
match = matches[0] match = matches[0]
transaction = self._nullify(transaction) self._nullify(transaction)
match = self._nullify(match) self._nullify(match)
def _cancels(self, transaction: Transaction, cancel: Transaction): def _cancels(self, transaction: Transaction, cancel: Transaction):
return ( return (
@ -79,6 +79,9 @@ class Nullifier(Transformer):
and cancel != transaction and cancel != transaction
and cancel.bank != transaction.bank and cancel.bank != transaction.bank
and cancel.amount == -transaction.amount and cancel.amount == -transaction.amount
# even though this class receives uncategorized transactions, they may have
# already been nullified before reaching here
and not transaction.category
and (not cancel.category or cancel.category.name != "null") and (not cancel.category or cancel.category.name != "null")
and ( and (
any(r.matches(transaction) for r in self.rules) if self.rules else True any(r.matches(transaction) for r in self.rules) if self.rules else True

View File

@ -1,5 +1,6 @@
from datetime import date from datetime import date
from decimal import Decimal from decimal import Decimal
import pytest
import mocks.categories as mock import mocks.categories as mock
@ -12,6 +13,7 @@ from pfbudget.db.model import (
TransactionTag, TransactionTag,
) )
from pfbudget.transform.categorizer import Categorizer from pfbudget.transform.categorizer import Categorizer
from pfbudget.transform.exceptions import MoreThanOneMatchError
from pfbudget.transform.nullifier import Nullifier from pfbudget.transform.nullifier import Nullifier
from pfbudget.transform.tagger import Tagger from pfbudget.transform.tagger import Tagger
from pfbudget.transform.transform import Transformer from pfbudget.transform.transform import Transformer
@ -48,6 +50,36 @@ class TestTransform:
for t in transactions: for t in transactions:
assert t.category == TransactionCategory("null", CategorySelector.nullifier) assert t.category == TransactionCategory("null", CategorySelector.nullifier)
def test_nullifier_inplace_unordered(self):
transactions = [
BankTransaction(date(2023, 1, 2), "A2", Decimal("500"), bank="Bank#2"),
BankTransaction(date(2023, 1, 2), "B1", Decimal("-500"), bank="Bank#1"),
BankTransaction(date(2023, 1, 1), "A1", Decimal("-500"), bank="Bank#1"),
BankTransaction(date(2023, 1, 2), "B2", Decimal("500"), bank="Bank#2"),
]
for t in transactions:
assert not t.category
categorizer: Transformer = Nullifier()
with pytest.raises(MoreThanOneMatchError):
categorizer.transform_inplace(transactions)
def test_nullifier_inplace_repeated(self):
transactions = [
BankTransaction(date(2023, 1, 2), "A1", Decimal("-500"), bank="Bank#1"),
BankTransaction(date(2023, 1, 2), "A2", Decimal("500"), bank="Bank#2"),
BankTransaction(date(2023, 1, 2), "B1", Decimal("-500"), bank="Bank#1"),
BankTransaction(date(2023, 1, 2), "B2", Decimal("500"), bank="Bank#2"),
]
for t in transactions:
assert not t.category
categorizer: Transformer = Nullifier()
with pytest.raises(MoreThanOneMatchError):
categorizer.transform_inplace(transactions)
def test_nullifier_with_rules(self): def test_nullifier_with_rules(self):
transactions = [ transactions = [
BankTransaction(date(2023, 1, 1), "", Decimal("-500"), bank="Bank#1"), BankTransaction(date(2023, 1, 1), "", Decimal("-500"), bank="Bank#1"),