librebudget/scrapers/bank_of_america.py
Ian Adam Naval a64b8b2c41 Add some basic Web scraping logic
Can now pull basic account balance and transaction information.
2015-05-11 19:16:56 -04:00

96 lines
3.4 KiB
Python

"""Scraper implementation for Bank of America."""
from getpass import getpass
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
from common import BankWebAuthenticator, BankScraper
class BankOfAmericaWebAuthenticator(BankWebAuthenticator):
"""Logs a user in using the two-step form currently provided by BoA.
This will be replaced at some point with a single sign-in form
according to the BoA website.
Currently, we deal with the "Verify your Identity" page by parsing
the question and prompting the user.
"""
def login(self, driver, credentials):
username, password = credentials
driver.get("https://bankofamerica.com")
driver.find_element_by_id("id").send_keys(username)
driver.find_element_by_id("hp-sign-in-btn").click()
try:
driver.find_element_by_id("tlpvt-passcode-input").send_keys(password)
driver.find_element_by_id("passcode-confirm-sk-submit").click()
except NoSuchElementException:
# Prompt user for challenge page
soup = BeautifulSoup(driver.page_source)
prompt = soup.select('label[for=tlpvt-challenge-answer]')[0].text
answer = input(prompt.strip())
driver.find_element_by_id("tlpvt-challenge-answer").send_keys(answer)
driver.find_element_by_id("verify-cq-submit").click()
return "Your request can't be completed:" not in driver.page_source
class BankOfAmericaBankScraper(BankScraper):
ACCOUNTS_URL = ("https://secure.bankofamerica.com/myaccounts/brain/"
"redirect.go?target=accountsoverview&request_locale=en-us")
def __init__(self, driver):
authenticator = BankOfAmericaWebAuthenticator()
super(BankOfAmericaBankScraper, self).__init__(driver, authenticator)
def get_accounts(self):
self.driver.get(self.ACCOUNTS_URL)
soup = BeautifulSoup(self.driver.page_source)
names = [e.text.strip() for e in soup.find_all(class_='image-account')]
balances = [e.text.strip() for e in soup.find_all(class_='TL_NPI_L1')]
accounts = [
{
'name': name,
'balance': balance
}
for name, balance in zip(names, balances)
]
return accounts
def get_transactions_for_account(self, account):
name = account['name']
self.driver.get(self.ACCOUNTS_URL)
self.driver.find_element_by_id(name).click()
soup = BeautifulSoup(self.driver.page_source)
rows = soup.select('.transaction-records tr')
transactions = [self._tr_to_transaction(row) for row in rows]
account['transactions'] = [e for e in transactions if e] # filter None
return account
def _tr_to_transaction(self, tr):
try:
date = tr.select('.date-action span')[0].text.strip()
description = tr.select('.transTitleForEditDesc')[0].text.strip()
amount = tr.select('.amount')[0].text.strip()
return {
'date': date,
'description': description,
'amount': amount
}
except:
return None
def main():
driver = webdriver.PhantomJS()
credentials = (input("username: "), getpass("password: "))
scraper = BankOfAmericaBankScraper(driver)
print(scraper.get_data(credentials))
if __name__ == '__main__':
main()