reader.py is removed, with main.py becoming the main function where all others are called from. All categories added on categories.py, each with its own search function and regex. The categorize function is also defined on the base class The parsers have been cleaned to a more strimmed version. Each parser has its own parse method and encoding and separator attributes. The `Transaction` class has been incremented with all the comparator methods. It also has the read and write from file methods. The transactions.pickle is no longer used, since changes can be done directly to the parsed data in the data_dir, making the file unused. A manual categorization function has been created to help fill in the gaps, interchangeable with directly editing the parsed .csv files. The master record of data are the .csv present in the data_dir.
188 lines
5.0 KiB
Python
188 lines
5.0 KiB
Python
from datetime import datetime
|
|
from decimal import Decimal, InvalidOperation
|
|
from pathlib import Path
|
|
from transaction import Transaction
|
|
|
|
|
|
class Parser:
|
|
def parse(self, file):
|
|
pass
|
|
|
|
@staticmethod
|
|
def parse_csv(file: Path, append=False):
|
|
name = file.stem.split("_")
|
|
try:
|
|
bank, _ = name[0], int(name[1])
|
|
except ValueError:
|
|
_, bank = int(name[0]), name[1]
|
|
|
|
p = dict(
|
|
Bank1=Bank1,
|
|
Bank2=Bank2,
|
|
Bank2CC=Bank2CC,
|
|
BANK3=Bank3,
|
|
)
|
|
|
|
try:
|
|
parser = p[bank]()
|
|
except KeyError as e:
|
|
print(f"{e} {bank} parser doesnt exist. Cant parse {name}")
|
|
return
|
|
|
|
transactions = parser.parse(file)
|
|
return transactions
|
|
|
|
|
|
class Bank1(Parser):
|
|
"""Bank 1 parser
|
|
|
|
Bank 1 transcripts have the following properties:
|
|
encoding: utf-8
|
|
separator: ;
|
|
starting line: 5
|
|
date format: %d/%m/%Y
|
|
|
|
The reading order is reversed to go from earlier to latest.
|
|
"""
|
|
|
|
encoding = "utf-8"
|
|
separator = ";"
|
|
|
|
def parse(self, file):
|
|
transactions = []
|
|
reader = [
|
|
line.rstrip().split(self.separator)
|
|
for line in open(file, encoding=self.encoding)
|
|
][5:]
|
|
|
|
for transaction in reversed(reader):
|
|
transaction = [field.rstrip() for field in transaction]
|
|
date = datetime.strptime(transaction[1], "%d/%m/%Y").date()
|
|
description = " ".join(transaction[3].split())
|
|
value = Decimal(transaction[4])
|
|
|
|
transactions.append(
|
|
Transaction(date.isoformat(), description, "Bank1", value)
|
|
)
|
|
|
|
return transactions
|
|
|
|
|
|
class Bank2(Parser):
|
|
"""Bank 2 parser
|
|
|
|
Bank 2 transcripts have the following properties:
|
|
encoding: utf-8
|
|
separator: tab
|
|
date format: %d/%m/%Y
|
|
decimal separator: ,
|
|
"""
|
|
|
|
encoding = "utf-8"
|
|
separator = "\t"
|
|
|
|
def parse(self, file):
|
|
transactions = []
|
|
reader = [
|
|
line.rstrip().split(self.separator)
|
|
for line in open(file, encoding=self.encoding)
|
|
]
|
|
|
|
for transaction in reader:
|
|
date = datetime.strptime(transaction[0], "%d/%m/%Y").date()
|
|
description = transaction[2]
|
|
try:
|
|
value = Decimal(transaction[3])
|
|
except InvalidOperation:
|
|
transaction[3] = transaction[3].replace(",", "")
|
|
value = Decimal(transaction[3])
|
|
|
|
transactions.append(
|
|
Transaction(date.isoformat(), description, "Bank2", value)
|
|
)
|
|
|
|
return transactions
|
|
|
|
|
|
class Bank2CC(Parser):
|
|
"""Bank 2 credit card parser
|
|
|
|
Bank 2 credit card transcripts have the following properties:
|
|
encoding: utf-8
|
|
separator: tab
|
|
date format: %d/%m/%Y
|
|
decimal separator: ,
|
|
"""
|
|
|
|
encoding = "utf-8"
|
|
separator = "\t"
|
|
|
|
def parse(self, file):
|
|
transactions = []
|
|
reader = [
|
|
line.rstrip().split(self.separator)
|
|
for line in open(file, encoding=self.encoding)
|
|
]
|
|
|
|
for transaction in reader:
|
|
date = datetime.strptime(transaction[0], "%d/%m/%Y").date()
|
|
description = transaction[2]
|
|
try:
|
|
value = Decimal(transaction[3])
|
|
except InvalidOperation:
|
|
transaction[3] = transaction[3].replace(",", "")
|
|
value = -Decimal(transaction[3])
|
|
|
|
if value > 0:
|
|
date = datetime.strptime(transaction[1], "%d/%m/%Y").date()
|
|
|
|
transactions.append(
|
|
Transaction(date.isoformat(), description, "Bank2CC", value)
|
|
)
|
|
|
|
return transactions
|
|
|
|
|
|
class Bank3(Parser):
|
|
"""Bank 3 parser
|
|
|
|
Bank 3 transcripts have the following properties:
|
|
encoding: windows-1252 (passed as argument)
|
|
separator: ;
|
|
starting line: 7
|
|
finishing line: -1
|
|
date format: %d-%m-%Y
|
|
decimal separator: ,
|
|
thousands separator: .
|
|
|
|
Bank 3 has credits in a different column from debits. These also have to be
|
|
negated. The reading order is reversed to go from earlier to latest.
|
|
"""
|
|
|
|
encoding = "windows-1252"
|
|
separator = ","
|
|
|
|
def parse(self, file):
|
|
transactions = []
|
|
reader = [
|
|
line.rstrip().split(self.separator)
|
|
for line in open(file, encoding=self.encoding)
|
|
][7:-1]
|
|
|
|
for transaction in reversed(reader):
|
|
transaction = [field.rstrip() for field in transaction]
|
|
date = datetime.strptime(transaction[1], "%d-%m-%Y").date()
|
|
description = transaction[2]
|
|
if t := transaction[3]:
|
|
t = t.replace(".", "").replace(",", ".")
|
|
value = -Decimal(t)
|
|
else:
|
|
t = transaction[4].replace(".", "").replace(",", ".")
|
|
value = Decimal(t)
|
|
|
|
transactions.append(
|
|
Transaction(date.isoformat(), description, "Bank3", value)
|
|
)
|
|
|
|
return transactions
|