Add some basic Web scraping logic
Can now pull basic account balance and transaction information.
This commit is contained in:
parent
b05785ccdb
commit
a64b8b2c41
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
__pycache__
|
||||
ghostdriver.log
|
||||
*.pyc
|
||||
95
scrapers/bank_of_america.py
Normal file
95
scrapers/bank_of_america.py
Normal file
@ -0,0 +1,95 @@
|
||||
"""Scraper implementation for Bank of America."""
|
||||
|
||||
from getpass import getpass
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.common.exceptions import NoSuchElementException
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from common import BankWebAuthenticator, BankScraper
|
||||
|
||||
|
||||
class BankOfAmericaWebAuthenticator(BankWebAuthenticator):
|
||||
"""Logs a user in using the two-step form currently provided by BoA.
|
||||
This will be replaced at some point with a single sign-in form
|
||||
according to the BoA website.
|
||||
|
||||
Currently, we deal with the "Verify your Identity" page by parsing
|
||||
the question and prompting the user.
|
||||
"""
|
||||
|
||||
def login(self, driver, credentials):
|
||||
username, password = credentials
|
||||
driver.get("https://bankofamerica.com")
|
||||
driver.find_element_by_id("id").send_keys(username)
|
||||
driver.find_element_by_id("hp-sign-in-btn").click()
|
||||
try:
|
||||
driver.find_element_by_id("tlpvt-passcode-input").send_keys(password)
|
||||
driver.find_element_by_id("passcode-confirm-sk-submit").click()
|
||||
except NoSuchElementException:
|
||||
# Prompt user for challenge page
|
||||
soup = BeautifulSoup(driver.page_source)
|
||||
prompt = soup.select('label[for=tlpvt-challenge-answer]')[0].text
|
||||
answer = input(prompt.strip())
|
||||
driver.find_element_by_id("tlpvt-challenge-answer").send_keys(answer)
|
||||
driver.find_element_by_id("verify-cq-submit").click()
|
||||
return "Your request can't be completed:" not in driver.page_source
|
||||
|
||||
|
||||
class BankOfAmericaBankScraper(BankScraper):
|
||||
|
||||
ACCOUNTS_URL = ("https://secure.bankofamerica.com/myaccounts/brain/"
|
||||
"redirect.go?target=accountsoverview&request_locale=en-us")
|
||||
|
||||
def __init__(self, driver):
|
||||
authenticator = BankOfAmericaWebAuthenticator()
|
||||
super(BankOfAmericaBankScraper, self).__init__(driver, authenticator)
|
||||
|
||||
def get_accounts(self):
|
||||
self.driver.get(self.ACCOUNTS_URL)
|
||||
soup = BeautifulSoup(self.driver.page_source)
|
||||
names = [e.text.strip() for e in soup.find_all(class_='image-account')]
|
||||
balances = [e.text.strip() for e in soup.find_all(class_='TL_NPI_L1')]
|
||||
accounts = [
|
||||
{
|
||||
'name': name,
|
||||
'balance': balance
|
||||
}
|
||||
for name, balance in zip(names, balances)
|
||||
]
|
||||
return accounts
|
||||
|
||||
def get_transactions_for_account(self, account):
|
||||
name = account['name']
|
||||
self.driver.get(self.ACCOUNTS_URL)
|
||||
self.driver.find_element_by_id(name).click()
|
||||
soup = BeautifulSoup(self.driver.page_source)
|
||||
rows = soup.select('.transaction-records tr')
|
||||
transactions = [self._tr_to_transaction(row) for row in rows]
|
||||
account['transactions'] = [e for e in transactions if e] # filter None
|
||||
return account
|
||||
|
||||
def _tr_to_transaction(self, tr):
|
||||
try:
|
||||
date = tr.select('.date-action span')[0].text.strip()
|
||||
description = tr.select('.transTitleForEditDesc')[0].text.strip()
|
||||
amount = tr.select('.amount')[0].text.strip()
|
||||
return {
|
||||
'date': date,
|
||||
'description': description,
|
||||
'amount': amount
|
||||
}
|
||||
except:
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
driver = webdriver.PhantomJS()
|
||||
credentials = (input("username: "), getpass("password: "))
|
||||
scraper = BankOfAmericaBankScraper(driver)
|
||||
print(scraper.get_data(credentials))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
79
scrapers/common.py
Normal file
79
scrapers/common.py
Normal file
@ -0,0 +1,79 @@
|
||||
"""Module for bank website scraping interfaces and base classes."""
|
||||
|
||||
from selenium.webdriver.remote.webdriver import WebDriver
|
||||
|
||||
|
||||
class BankWebAuthenticator(object):
|
||||
|
||||
def login(self, driver, credentials):
|
||||
"""Logs a user in using the given credentials.
|
||||
|
||||
:return: Whether the login was successful
|
||||
"""
|
||||
raise NotImplementedError("Must extend BankWebAuthenticator")
|
||||
|
||||
|
||||
class BankScraper(object):
|
||||
"""Generic interface for a Web scraper that pulls information from
|
||||
bank websites."""
|
||||
|
||||
def __init__(self, driver, authenticator):
|
||||
"""Initializes the BankScraper
|
||||
|
||||
:param driver: A Selenium web driver
|
||||
:param authenticator: A BankWebAuthenticator
|
||||
"""
|
||||
assert isinstance(authenticator, BankWebAuthenticator)
|
||||
self.driver = driver
|
||||
self.authenticator = authenticator
|
||||
|
||||
def get_data(self, credentials, refresh=False):
|
||||
"""Returns some data structure with the information parsed.
|
||||
Locally caches if possible.
|
||||
|
||||
:param credentials: Credentials for the authenticator
|
||||
two-tuple of (username ,password)
|
||||
:param refresh: Forces the scraper to ignore local cache
|
||||
|
||||
:return: The data retrieved
|
||||
"""
|
||||
if self.authenticator.login(self.driver, credentials):
|
||||
accounts = self.get_accounts()
|
||||
return {
|
||||
'accounts': self.get_transactions(accounts)
|
||||
}
|
||||
else:
|
||||
# should maybe raise an exception here instead of silently
|
||||
# failing
|
||||
return []
|
||||
|
||||
def get_accounts(self):
|
||||
"""Retrieves account information such as bank balance
|
||||
|
||||
:return: List of dicts that contain the keys 'name' and
|
||||
'balance'.
|
||||
"""
|
||||
raise NotImplementedError("Must extend BankScraper")
|
||||
|
||||
def get_transactions(self, accounts):
|
||||
"""Gets the transactions associated with each account.
|
||||
|
||||
:param accounts: List of dicts with the key 'name'
|
||||
|
||||
:return: List of the same dicts but with a new key:
|
||||
'transactions', which is itself a list of dicts
|
||||
"""
|
||||
return [
|
||||
self.get_transactions_for_account(account)
|
||||
for account in accounts
|
||||
]
|
||||
|
||||
def get_transactions_for_account(self, account):
|
||||
"""Gets the transactions for one account.
|
||||
|
||||
:param: dict with key 'name'
|
||||
|
||||
:return: dict with keys 'name' and 'transactions', whose value
|
||||
is a list of the transactions for this account
|
||||
"""
|
||||
raise NotImplementedError("Must extend BankScraper")
|
||||
Loading…
x
Reference in New Issue
Block a user