autopilot/autopilot/scrape.py
Ian Adam Naval 82b734d1a8 Add basic event summary and description
Summary is the title of the event in your calendar, so we want it to be
a little less verbose.
2019-01-21 22:13:59 -05:00

107 lines
3.3 KiB
Python

import logging
import logging.config
from typing import List
import pendulum
import requests
from bs4 import BeautifulSoup
from bs4.element import Tag
from autopilot import config
from autopilot.reservation import Reservation
LOGGER = logging.getLogger('autopilot.scrape')
def init_session() -> requests.Session:
headers = {
# required for some requests or else it errors
'user-agent': ('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHT'
'ML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'),
}
session = requests.Session()
session.headers.update(headers)
return session
def authenticate(session: requests.Session):
credentials = {
'txtUserID': config.USERNAME,
'txtPassword': config.PASSWORD,
'mode': 'process', # set by client-side JS; required to auth
}
response = session.post(f'{config.BASE_URL}/login.asp', data=credentials)
if "Welcome" not in response.text:
raise ValueError('failed to authenticate')
def format_day(day: pendulum.DateTime) -> str:
return day.format('M/D/YYYY') # standard format throughout aerocalendar
def raw_schedule_html(session: requests.Session) -> BeautifulSoup:
today = pendulum.today(config.TIME_ZONE)
params = (
('date', format_day(today)),
('location', '1'), # Bedford
)
url = f'{config.BASE_URL}/MySchedule.asp'
data = {
'rdoDays': 56, # Look ahead 56 days
'txtStartDate': format_day(today),
'txtEndDate': format_day(today.end_of('week')),
'txtSaveLocation': 1,
'txtSaveDate': format_day(today),
'submittype': 'notes',
'reservation': '',
'cancelcode': '',
'submitted': 'true',
'user': config.USER_ID,
}
response = session.post(url, params=params, data=data)
return response.text
def make_reservation_from_tag(tag: Tag) -> Reservation:
"""Parse attributes from the title attribute of the reservation
Example raw string parsed:
Pilot : Ian Naval
Instructor : Agnelo Lopes CFI, CFII
Aircraft : N222ND
Location : Bedford
Start : 1/26/2019 12:00 PM
End : 1/26/2019 3:00 PM
Comments -----------------------------
"
"""
raw = tag.get('title')
lines = raw.replace('\t', '').split('\r\n')
attributes = {}
for line in lines:
if ':' in line:
key, value = line.split(':', maxsplit=1)
attributes[key.lower().strip()] = value.strip()
attributes['comments'] = lines[-2]
for datetime_attr in {'start', 'end'}:
parsed_datetime = pendulum.parse(attributes[datetime_attr],
strict=False,
tz=config.TIME_ZONE)
attributes[datetime_attr] = parsed_datetime
attributes['raw'] = raw.replace('\r', '').replace('\t', '')
return Reservation(**attributes)
def get_reservations() -> List[Reservation]:
logging.config.dictConfig(config.LOGGING_CONFIG)
session = init_session()
authenticate(session)
LOGGER.info("Authentication successful")
html = raw_schedule_html(session)
soup = BeautifulSoup(html, 'html.parser')
reservations = [make_reservation_from_tag(tag) for tag in soup.select('td.cR')]
LOGGER.info("Got %d reservations", len(reservations))
return reservations