-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfinance.py
More file actions
66 lines (53 loc) · 2.63 KB
/
finance.py
File metadata and controls
66 lines (53 loc) · 2.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import re
import numpy as np
import gspread
from oauth2client.service_account import ServiceAccountCredentials
def similarity(query, reference):
common_letters = set(query).intersection(set(reference))
precision = len(common_letters) / len(set(query))
recall = len(common_letters) / len(set(reference))
try:
f1 = 2 * precision * recall / (precision + recall)
except(ZeroDivisionError):
f1 = 0
return f1
def get_most_similar(query, categories):
similarities = list(map(lambda cat: similarity(query, cat), categories))
return categories[np.argmax(similarities)], max(similarities)
class SheetWriter:
def __init__(self, cred_path):
scopes = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive']
self.credentials = ServiceAccountCredentials.from_json_keyfile_name(cred_path, scopes)
self.categories = self.get_categories()
def write_to_gsheet(self, amount, category, comment):
file = gspread.authorize(self.credentials)
sheet = file.open('финансы').worksheets()[0]
write_row_ind = len(sheet.col_values(1)) + 1
sheet.update(f"B{write_row_ind}:D{write_row_ind}", [[amount, category, comment]])
def parse_expense(self, text):
if "трат" in text[:15].lower():
text = text[text.index('трат') + 4 + 2:]
elif "расход" in text[:15].lower():
text = text[text.index('расход') + 6 + 2:]
try:
amount, category, *comment = text.split(' ')
amount = int(re.sub('\.', '', amount))
comment = ' '.join(comment)
if category not in self.categories:
category, similarity = get_most_similar(category, self.categories)
if similarity < 0.85:
raise ValueError
except ValueError:
words = text.strip().split(' ')
amount_candidates = [re.sub('[^0-9]', '', w) for w in words]
amount_ind = np.argmax(list(map(len, amount_candidates)))
amount = int(amount_candidates[amount_ind])
similarity_scores = [get_most_similar(w, self.categories) for w in words]
category_ind = np.argmax(list(map(lambda x: x[1], similarity_scores)))
category = similarity_scores[category_ind][0]
comment = ' '.join([w for i, w in enumerate(words) if i not in {amount_ind, category_ind}])
return amount, category, comment
def get_categories(self):
file = gspread.authorize(self.credentials)
sheet = file.open('финансы').worksheets()[2]
return sheet.col_values(1)