librebudget/scrapers/common.py
Ian Adam Naval a64b8b2c41 Add some basic Web scraping logic
Can now pull basic account balance and transaction information.
2015-05-11 19:16:56 -04:00

80 lines
2.5 KiB
Python

"""Module for bank website scraping interfaces and base classes."""
from selenium.webdriver.remote.webdriver import WebDriver
class BankWebAuthenticator(object):
def login(self, driver, credentials):
"""Logs a user in using the given credentials.
:return: Whether the login was successful
"""
raise NotImplementedError("Must extend BankWebAuthenticator")
class BankScraper(object):
"""Generic interface for a Web scraper that pulls information from
bank websites."""
def __init__(self, driver, authenticator):
"""Initializes the BankScraper
:param driver: A Selenium web driver
:param authenticator: A BankWebAuthenticator
"""
assert isinstance(authenticator, BankWebAuthenticator)
self.driver = driver
self.authenticator = authenticator
def get_data(self, credentials, refresh=False):
"""Returns some data structure with the information parsed.
Locally caches if possible.
:param credentials: Credentials for the authenticator
two-tuple of (username ,password)
:param refresh: Forces the scraper to ignore local cache
:return: The data retrieved
"""
if self.authenticator.login(self.driver, credentials):
accounts = self.get_accounts()
return {
'accounts': self.get_transactions(accounts)
}
else:
# should maybe raise an exception here instead of silently
# failing
return []
def get_accounts(self):
"""Retrieves account information such as bank balance
:return: List of dicts that contain the keys 'name' and
'balance'.
"""
raise NotImplementedError("Must extend BankScraper")
def get_transactions(self, accounts):
"""Gets the transactions associated with each account.
:param accounts: List of dicts with the key 'name'
:return: List of the same dicts but with a new key:
'transactions', which is itself a list of dicts
"""
return [
self.get_transactions_for_account(account)
for account in accounts
]
def get_transactions_for_account(self, account):
"""Gets the transactions for one account.
:param: dict with key 'name'
:return: dict with keys 'name' and 'transactions', whose value
is a list of the transactions for this account
"""
raise NotImplementedError("Must extend BankScraper")