Parsers configuration now on parsers.yaml
parsers.py redid with single default parser that takes configurating parameters from a yaml file. Additional parsing configuration can be achieved with the additional_parser attribute on yaml and rewriting the func and parser method on child classes of Parser. func will be called after each transaction is created and the parser should call the parent parser method or rewrite the entire parser process. The parse_data function is now called from the runnable and the parsing process is now called from there. The parse command can take an optional bank before is tries to extract it from the filename and multiple paths, either files or directories. The Transaction __init__ was fixed to take inputs from previously initiated Transaction. Also adds utils.py with helper functions.
This commit is contained in:
parent
4d9e8edec8
commit
4d6c865bb5
@ -3,7 +3,7 @@
|
||||
parsing -> categorize -> analyze (predict)
|
||||
|
||||
## Parsing
|
||||
Parses bank extracts, based on parsers.py functions, to SQLite db.
|
||||
Parses bank extracts, based on parsers.yaml, to a SQLite database.
|
||||
|
||||
## Categorize
|
||||
Categorizes transactions based on categories.py filters.
|
||||
|
||||
60
parsers.yaml
Normal file
60
parsers.yaml
Normal file
@ -0,0 +1,60 @@
|
||||
Banks:
|
||||
- Bank1
|
||||
- Bank2
|
||||
- Bank3
|
||||
|
||||
CreditCards:
|
||||
- VISA
|
||||
- MasterCard
|
||||
- AmericanExpress
|
||||
|
||||
default: &default
|
||||
encoding: utf-8
|
||||
separator: "\t"
|
||||
date_fmt: "%Y-%m-%d"
|
||||
|
||||
Bank1:
|
||||
<<: *default
|
||||
separator: ";"
|
||||
date_fmt: "%d/%m/%Y"
|
||||
start: 6
|
||||
debit:
|
||||
date: 1
|
||||
text: 3
|
||||
value: 4
|
||||
additional_parser: true
|
||||
|
||||
|
||||
Bank2: &bank2
|
||||
<<: *default
|
||||
date_fmt: "%d/%m/%Y"
|
||||
debit:
|
||||
date: 0
|
||||
text: 2
|
||||
value: 3
|
||||
VISA:
|
||||
<<: *bank2
|
||||
debit:
|
||||
date: 0
|
||||
text: 2
|
||||
value: 3
|
||||
credit:
|
||||
date: 1
|
||||
text: 2
|
||||
value: 3
|
||||
|
||||
Bank3:
|
||||
encoding: windows-1252
|
||||
separator: ","
|
||||
date_fmt: "%d-%m-%Y"
|
||||
start: 8
|
||||
end: -1
|
||||
debit:
|
||||
date: 1
|
||||
text: 2
|
||||
value: 3
|
||||
negate: true
|
||||
credit:
|
||||
date: 0
|
||||
text: 2
|
||||
value: 4
|
||||
@ -1,198 +1,151 @@
|
||||
from datetime import datetime
|
||||
from collections import namedtuple
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from importlib import import_module
|
||||
from typing import Final
|
||||
import datetime as dt
|
||||
import yaml
|
||||
|
||||
from .transactions import Transaction
|
||||
from . import utils
|
||||
|
||||
|
||||
def parse_data(file: Path, append=False):
|
||||
name = file.stem.split("_")
|
||||
try:
|
||||
bank, _ = name[0], int(name[1])
|
||||
except ValueError:
|
||||
_, bank = int(name[0]), name[1]
|
||||
cfg: Final = yaml.safe_load(open("parsers.yaml"))
|
||||
assert (
|
||||
"Banks" in cfg
|
||||
), "parsers.yaml is missing the Banks section with the list of available banks"
|
||||
|
||||
p = dict(
|
||||
Bank1=Bank1,
|
||||
Bank2=Bank2,
|
||||
Bank2CC=Bank2CC,
|
||||
BANK3=Bank3,
|
||||
)
|
||||
Index = namedtuple(
|
||||
"Index", ["date", "text", "value", "negate"], defaults=[-1, -1, -1, False]
|
||||
)
|
||||
Options = namedtuple(
|
||||
"Options",
|
||||
[
|
||||
"encoding",
|
||||
"separator",
|
||||
"date_fmt",
|
||||
"start",
|
||||
"end",
|
||||
"debit",
|
||||
"credit",
|
||||
"additional_parser",
|
||||
"VISA",
|
||||
"MasterCard",
|
||||
"AmericanExpress",
|
||||
],
|
||||
defaults=["", "", "", 1, None, Index(), Index(), False, None, None, None],
|
||||
)
|
||||
|
||||
try:
|
||||
parser = p[bank]()
|
||||
except KeyError as e:
|
||||
print(f"{e} {bank} parser doesnt exist. Cant parse {name}")
|
||||
return
|
||||
|
||||
transactions = parser.parse(file)
|
||||
def parse_data(filename: str, bank=None) -> list:
|
||||
if not bank:
|
||||
bank, creditcard = utils.find_credit_institution(
|
||||
filename, cfg.get("Banks"), cfg.get("CreditCards")
|
||||
)
|
||||
|
||||
if creditcard:
|
||||
options = cfg[bank][creditcard]
|
||||
bank += creditcard
|
||||
else:
|
||||
options = cfg[bank]
|
||||
|
||||
if options.get("additional_parser", False):
|
||||
parser = getattr(import_module("pfbudget.parsers"), bank)
|
||||
transactions = parser(filename, bank, options).parse()
|
||||
else:
|
||||
transactions = Parser(filename, bank, options).parse()
|
||||
|
||||
return transactions
|
||||
|
||||
|
||||
def transaction(line: str, bank: str, options: Options, func) -> Transaction:
|
||||
line = line.rstrip().split(options.separator)
|
||||
index = Parser.index(line, options)
|
||||
|
||||
date = (
|
||||
dt.datetime.strptime(line[index.date].strip(), options.date_fmt)
|
||||
.date()
|
||||
.isoformat()
|
||||
)
|
||||
text = line[index.text]
|
||||
value = utils.parse_decimal(line[index.value])
|
||||
if index.negate:
|
||||
value = -value
|
||||
transaction = Transaction(date, text, bank, value)
|
||||
|
||||
if options.additional_parser:
|
||||
func(transaction)
|
||||
return transaction
|
||||
|
||||
|
||||
class Parser:
|
||||
def parse(self, file):
|
||||
def __init__(self, filename: str, bank: str, options: dict):
|
||||
self.filename = filename
|
||||
self.bank = bank
|
||||
|
||||
if debit := options.get("debit", None):
|
||||
options["debit"] = Index(**debit)
|
||||
if credit := options.get("credit", None):
|
||||
options["credit"] = Index(**credit)
|
||||
|
||||
self.options = Options(**options)
|
||||
|
||||
def func(self, transaction: Transaction):
|
||||
pass
|
||||
|
||||
def parse(self) -> list:
|
||||
transactions = [
|
||||
transaction(line, self.bank, self.options, self.func)
|
||||
for line in list(open(self.filename, encoding=self.options.encoding))[
|
||||
self.options.start - 1 : self.options.end
|
||||
]
|
||||
]
|
||||
return transactions
|
||||
|
||||
@staticmethod
|
||||
def index(line: list, options: Options) -> Index:
|
||||
if options.debit.date != -1 and options.credit.date != -1:
|
||||
if options.debit.value != options.credit.value:
|
||||
if line[options.debit.value]:
|
||||
index = options.debit
|
||||
elif line[options.credit.value]:
|
||||
index = options.credit
|
||||
elif options.debit.date != options.credit.date:
|
||||
if line[options.debit.date]:
|
||||
index = options.debit
|
||||
elif line[options.credit.date]:
|
||||
index = options.credit
|
||||
elif options.debit.text != options.credit.text:
|
||||
if line[options.debit.text]:
|
||||
index = options.debit
|
||||
elif line[options.credit.text]:
|
||||
index = options.credit
|
||||
else:
|
||||
raise IndexError("Debit and credit indexes are equal")
|
||||
elif options.debit.date != -1:
|
||||
index = options.debit
|
||||
elif options.credit.date != -1:
|
||||
index = options.credit
|
||||
else:
|
||||
raise IndexError("No debit not credit indexes available")
|
||||
|
||||
return index
|
||||
|
||||
|
||||
class Bank1(Parser):
|
||||
"""Bank 1 parser
|
||||
def __init__(self, filename: str, bank: str, options: dict):
|
||||
super().__init__(filename, bank, options)
|
||||
self.transfers = []
|
||||
self.transaction_cost = -Decimal("1")
|
||||
|
||||
Bank 1 transcripts have the following properties:
|
||||
encoding: utf-8
|
||||
separator: ;
|
||||
starting line: 5
|
||||
date format: %d/%m/%Y
|
||||
|
||||
The reading order is reversed to go from earlier to latest.
|
||||
"""
|
||||
|
||||
encoding = "utf-8"
|
||||
separator = ";"
|
||||
|
||||
def parse(self, file):
|
||||
transactions = []
|
||||
reader = [
|
||||
line.rstrip().split(self.separator)
|
||||
for line in open(file, encoding=self.encoding)
|
||||
][5:]
|
||||
|
||||
for transaction in reversed(reader):
|
||||
transaction = [field.rstrip() for field in transaction]
|
||||
date = datetime.strptime(transaction[1], "%d/%m/%Y").date()
|
||||
description = " ".join(transaction[3].split())
|
||||
value = Decimal(transaction[4])
|
||||
def func(self, transaction: Transaction):
|
||||
if "transf" in transaction.description.lower() and transaction.value < 0:
|
||||
transaction.value -= self.transaction_cost
|
||||
self.transfers.append(transaction.date)
|
||||
|
||||
def parse(self) -> list:
|
||||
transactions = super().parse()
|
||||
for date in self.transfers:
|
||||
transactions.append(
|
||||
Transaction(date.isoformat(), description, "Bank1", value)
|
||||
Transaction(date, "Transaction cost", self.bank, self.transaction_cost)
|
||||
)
|
||||
|
||||
return transactions
|
||||
|
||||
|
||||
class Bank2(Parser):
|
||||
"""Bank 2 parser
|
||||
|
||||
Bank 2 transcripts have the following properties:
|
||||
encoding: utf-8
|
||||
separator: tab
|
||||
date format: %d/%m/%Y or %d-%m-%Y
|
||||
decimal separator: ,
|
||||
"""
|
||||
|
||||
encoding = "utf-8"
|
||||
separator = "\t"
|
||||
|
||||
def parse(self, file):
|
||||
transactions = []
|
||||
reader = [
|
||||
line.rstrip().split(self.separator)
|
||||
for line in open(file, encoding=self.encoding)
|
||||
]
|
||||
|
||||
for transaction in reader:
|
||||
try:
|
||||
date = datetime.strptime(transaction[0], "%d/%m/%Y").date()
|
||||
except ValueError: # date can differ due to locales
|
||||
date = datetime.strptime(transaction[0], "%d-%m-%Y").date()
|
||||
description = transaction[2]
|
||||
|
||||
# works for US and EU locales (5,000.00 and 5 000,00)
|
||||
value = list(transaction[3].replace("\xa0", "")) # non-breaking space
|
||||
value[-3] = "."
|
||||
value = "".join(value)
|
||||
value = value.replace(",", "")
|
||||
value = Decimal(value)
|
||||
|
||||
transactions.append(
|
||||
Transaction(date.isoformat(), description, "Bank2", value)
|
||||
)
|
||||
|
||||
return transactions
|
||||
|
||||
|
||||
class Bank2CC(Parser):
|
||||
"""Bank 2 credit card parser
|
||||
|
||||
Bank 2 credit card transcripts have the following properties:
|
||||
encoding: utf-8
|
||||
separator: tab
|
||||
date format: %d/%m/%Y or %d-%m-%Y
|
||||
decimal separator: ,
|
||||
"""
|
||||
|
||||
encoding = "utf-8"
|
||||
separator = "\t"
|
||||
|
||||
def parse(self, file):
|
||||
transactions = []
|
||||
reader = [
|
||||
line.rstrip().split(self.separator)
|
||||
for line in open(file, encoding=self.encoding)
|
||||
]
|
||||
|
||||
for transaction in reader:
|
||||
try:
|
||||
date = datetime.strptime(transaction[0], "%d/%m/%Y").date()
|
||||
except ValueError: # date can differ due to locales
|
||||
date = datetime.strptime(transaction[0], "%d-%m-%Y").date()
|
||||
description = transaction[2]
|
||||
|
||||
# works for US and EU locales (5,000.00 and 5 000,00)
|
||||
value = list(transaction[3].replace("\xa0", "")) # non-breaking space
|
||||
value[-3] = "."
|
||||
value = "".join(value)
|
||||
value = value.replace(",", "")
|
||||
value = Decimal(value)
|
||||
|
||||
if value > 0:
|
||||
date = datetime.strptime(transaction[1], "%d/%m/%Y").date()
|
||||
|
||||
transactions.append(
|
||||
Transaction(date.isoformat(), description, "Bank2CC", value)
|
||||
)
|
||||
|
||||
return transactions
|
||||
|
||||
|
||||
class Bank3(Parser):
|
||||
"""Bank 3 parser
|
||||
|
||||
Bank 3 transcripts have the following properties:
|
||||
encoding: windows-1252 (passed as argument)
|
||||
separator: ;
|
||||
starting line: 7
|
||||
finishing line: -1
|
||||
date format: %d-%m-%Y
|
||||
decimal separator: ,
|
||||
thousands separator: .
|
||||
|
||||
Bank 3 has credits in a different column from debits. These also have to be
|
||||
negated. The reading order is reversed to go from earlier to latest.
|
||||
"""
|
||||
|
||||
encoding = "windows-1252"
|
||||
separator = ","
|
||||
|
||||
def parse(self, file):
|
||||
transactions = []
|
||||
reader = [
|
||||
line.rstrip().split(self.separator)
|
||||
for line in open(file, encoding=self.encoding)
|
||||
][7:-1]
|
||||
|
||||
for transaction in reversed(reader):
|
||||
transaction = [field.rstrip() for field in transaction]
|
||||
date = datetime.strptime(transaction[1], "%d-%m-%Y").date()
|
||||
description = transaction[2]
|
||||
if t := transaction[3]:
|
||||
t = t.replace(".", "").replace(",", ".")
|
||||
value = -Decimal(t)
|
||||
else:
|
||||
t = transaction[4].replace(".", "").replace(",", ".")
|
||||
value = Decimal(t)
|
||||
|
||||
transactions.append(
|
||||
Transaction(date.isoformat(), description, "Bank3", value)
|
||||
)
|
||||
|
||||
return transactions
|
||||
|
||||
@ -4,6 +4,7 @@ import datetime as dt
|
||||
|
||||
from .database import DBManager
|
||||
from .graph import discrete, monthly
|
||||
from .parsers import parse_data
|
||||
from .transactions import load_transactions, save_transactions
|
||||
from . import report
|
||||
from . import tools
|
||||
@ -45,7 +46,13 @@ def argparser():
|
||||
p_export = subparsers.add_parser("export", help="export help")
|
||||
p_export.set_defaults(func=lambda args: DBManager(args.db).export())
|
||||
|
||||
"""
|
||||
Parsing
|
||||
"""
|
||||
p_parse = subparsers.add_parser("parse", help="parse help")
|
||||
p_parse.add_argument("path", nargs="+", type=str)
|
||||
p_parse.add_argument("--bank", nargs=1, type=str)
|
||||
p_parse.set_defaults(func=parse)
|
||||
|
||||
# p_restart = subparsers.add_parser("restart", help="restart help")
|
||||
p_vacation = subparsers.add_parser(
|
||||
@ -91,7 +98,6 @@ def argparser():
|
||||
p_graph_interval.add_argument("--end", type=str, nargs=1, help="graph end date")
|
||||
p_graph_interval.add_argument("--year", type=str, nargs=1, help="graph year")
|
||||
|
||||
p_parse.set_defaults(func=parse)
|
||||
# p_restart.set_defaults(func=restart)
|
||||
p_vacation.set_defaults(func=vacation)
|
||||
p_status.set_defaults(func=status)
|
||||
@ -134,7 +140,7 @@ def restart(state, args):
|
||||
raise PfBudgetNotInitialized(f"{Path(tools.STATE_FILE)} doesn't exist")
|
||||
|
||||
|
||||
def parse(state, args):
|
||||
def parse(args):
|
||||
"""Parser
|
||||
|
||||
Parses the contents of the raw directory into the data files, and
|
||||
@ -144,11 +150,17 @@ def parse(state, args):
|
||||
state (PFState): Internal state of the program
|
||||
args (dict): argparse variables
|
||||
"""
|
||||
raw_dir = args.raw if hasattr(args, "raw") else None
|
||||
data_dir = args.data if hasattr(args, "data") else None
|
||||
|
||||
tools.parser(state, raw_dir, data_dir)
|
||||
categorize(state, args)
|
||||
for path in args.path:
|
||||
if (dir := Path(path)).is_dir():
|
||||
for file in dir.iterdir():
|
||||
parse_data(file, args.bank)
|
||||
elif Path(path).is_file():
|
||||
trs = parse_data(path, args.bank)
|
||||
else:
|
||||
raise FileNotFoundError
|
||||
# tools.parser(state, raw_dir, data_dir)
|
||||
# categorize(state, args)
|
||||
print("\n".join([t.desc() for t in trs]))
|
||||
|
||||
|
||||
def categorize(state, args):
|
||||
|
||||
@ -23,10 +23,16 @@ class Transaction:
|
||||
|
||||
arg = args[0] if len(args) == 1 else list(args)
|
||||
try:
|
||||
self.date = date.fromisoformat(arg[0])
|
||||
if type(arg[0]) is date:
|
||||
self.date = arg[0]
|
||||
else:
|
||||
self.date = date.fromisoformat(arg[0])
|
||||
self.description = " ".join(arg[1].split())
|
||||
self.bank = arg[2]
|
||||
self.value = Decimal(arg[3])
|
||||
if type(arg[3]) is float:
|
||||
self.value = arg[3]
|
||||
else:
|
||||
self.value = Decimal(args[3])
|
||||
self.category = arg[4]
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
54
pfbudget/utils.py
Normal file
54
pfbudget/utils.py
Normal file
@ -0,0 +1,54 @@
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class WrongFilenameError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class BankNotAvailableError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class CreditCardNotAvailableError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def parse_decimal(s: str) -> Decimal:
|
||||
try:
|
||||
float(s)
|
||||
return Decimal(s)
|
||||
except ValueError:
|
||||
pass
|
||||
s = s.strip().replace(u"\xa0", "").replace(" ", "")
|
||||
s = s.strip().replace("€", "").replace("+", "")
|
||||
if s.rfind(",") > s.rfind("."):
|
||||
s = s.replace(".", "")
|
||||
i = s.rfind(",")
|
||||
li = list(s)
|
||||
li[i] = "."
|
||||
s = "".join(li)
|
||||
return Decimal(s.replace(",", ""))
|
||||
|
||||
|
||||
def find_credit_institution(fn, banks, creditcards):
|
||||
name = Path(fn).stem.split("_")
|
||||
bank, cc = None, None
|
||||
for i in name:
|
||||
try:
|
||||
int(i)
|
||||
except ValueError:
|
||||
if not bank:
|
||||
bank = i
|
||||
else:
|
||||
cc = i
|
||||
|
||||
if not bank:
|
||||
raise WrongFilenameError
|
||||
|
||||
if bank not in banks:
|
||||
raise BankNotAvailableError
|
||||
if cc and cc not in creditcards:
|
||||
raise CreditCardNotAvailableError
|
||||
|
||||
return bank, cc
|
||||
@ -1 +1,2 @@
|
||||
matplotlib==3.3.4
|
||||
PyYAML==5.4.1
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user