budget/parsers.py
Luís Murta ca12d1846c
Automatic and manual categorization
reader.py is removed, with main.py becoming the main function where all
others are called from.
All categories added on categories.py, each with its own search function
and regex. The categorize function is also defined on the base class

The parsers have been cleaned to a more strimmed version. Each parser has
its own parse method and encoding and separator attributes.

The `Transaction` class has been incremented with all the comparator
methods. It also has the read and write from file methods.

The transactions.pickle is no longer used, since changes can be done
directly to the parsed data in the data_dir, making the file unused. A
manual categorization function has been created to help fill in the
gaps, interchangeable with directly editing the parsed .csv files. The
master record of data are the .csv present in the data_dir.
2020-08-04 20:42:29 +01:00

188 lines
5.0 KiB
Python

from datetime import datetime
from decimal import Decimal, InvalidOperation
from pathlib import Path
from transaction import Transaction
class Parser:
def parse(self, file):
pass
@staticmethod
def parse_csv(file: Path, append=False):
name = file.stem.split("_")
try:
bank, _ = name[0], int(name[1])
except ValueError:
_, bank = int(name[0]), name[1]
p = dict(
Bank1=Bank1,
Bank2=Bank2,
Bank2CC=Bank2CC,
BANK3=Bank3,
)
try:
parser = p[bank]()
except KeyError as e:
print(f"{e} {bank} parser doesnt exist. Cant parse {name}")
return
transactions = parser.parse(file)
return transactions
class Bank1(Parser):
"""Bank 1 parser
Bank 1 transcripts have the following properties:
encoding: utf-8
separator: ;
starting line: 5
date format: %d/%m/%Y
The reading order is reversed to go from earlier to latest.
"""
encoding = "utf-8"
separator = ";"
def parse(self, file):
transactions = []
reader = [
line.rstrip().split(self.separator)
for line in open(file, encoding=self.encoding)
][5:]
for transaction in reversed(reader):
transaction = [field.rstrip() for field in transaction]
date = datetime.strptime(transaction[1], "%d/%m/%Y").date()
description = " ".join(transaction[3].split())
value = Decimal(transaction[4])
transactions.append(
Transaction(date.isoformat(), description, "Bank1", value)
)
return transactions
class Bank2(Parser):
"""Bank 2 parser
Bank 2 transcripts have the following properties:
encoding: utf-8
separator: tab
date format: %d/%m/%Y
decimal separator: ,
"""
encoding = "utf-8"
separator = "\t"
def parse(self, file):
transactions = []
reader = [
line.rstrip().split(self.separator)
for line in open(file, encoding=self.encoding)
]
for transaction in reader:
date = datetime.strptime(transaction[0], "%d/%m/%Y").date()
description = transaction[2]
try:
value = Decimal(transaction[3])
except InvalidOperation:
transaction[3] = transaction[3].replace(",", "")
value = Decimal(transaction[3])
transactions.append(
Transaction(date.isoformat(), description, "Bank2", value)
)
return transactions
class Bank2CC(Parser):
"""Bank 2 credit card parser
Bank 2 credit card transcripts have the following properties:
encoding: utf-8
separator: tab
date format: %d/%m/%Y
decimal separator: ,
"""
encoding = "utf-8"
separator = "\t"
def parse(self, file):
transactions = []
reader = [
line.rstrip().split(self.separator)
for line in open(file, encoding=self.encoding)
]
for transaction in reader:
date = datetime.strptime(transaction[0], "%d/%m/%Y").date()
description = transaction[2]
try:
value = Decimal(transaction[3])
except InvalidOperation:
transaction[3] = transaction[3].replace(",", "")
value = -Decimal(transaction[3])
if value > 0:
date = datetime.strptime(transaction[1], "%d/%m/%Y").date()
transactions.append(
Transaction(date.isoformat(), description, "Bank2CC", value)
)
return transactions
class Bank3(Parser):
"""Bank 3 parser
Bank 3 transcripts have the following properties:
encoding: windows-1252 (passed as argument)
separator: ;
starting line: 7
finishing line: -1
date format: %d-%m-%Y
decimal separator: ,
thousands separator: .
Bank 3 has credits in a different column from debits. These also have to be
negated. The reading order is reversed to go from earlier to latest.
"""
encoding = "windows-1252"
separator = ","
def parse(self, file):
transactions = []
reader = [
line.rstrip().split(self.separator)
for line in open(file, encoding=self.encoding)
][7:-1]
for transaction in reversed(reader):
transaction = [field.rstrip() for field in transaction]
date = datetime.strptime(transaction[1], "%d-%m-%Y").date()
description = transaction[2]
if t := transaction[3]:
t = t.replace(".", "").replace(",", ".")
value = -Decimal(t)
else:
t = transaction[4].replace(".", "").replace(",", ".")
value = Decimal(t)
transactions.append(
Transaction(date.isoformat(), description, "Bank3", value)
)
return transactions