Add basic scraping
This commit is contained in:
parent
44738acbea
commit
a4b65df21a
3
.env.example
Normal file
3
.env.example
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
export AUTOPILOT_USERNAME=
|
||||||
|
export AUTOPILOT_PASSWORD=
|
||||||
|
export AUTOPILOT_USER_ID=
|
||||||
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
__pycache__
|
||||||
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
.env
|
||||||
15
Pipfile
Normal file
15
Pipfile
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
[[source]]
|
||||||
|
name = "pypi"
|
||||||
|
url = "https://pypi.org/simple"
|
||||||
|
verify_ssl = true
|
||||||
|
|
||||||
|
[dev-packages]
|
||||||
|
pylint = "*"
|
||||||
|
|
||||||
|
[packages]
|
||||||
|
requests = "*"
|
||||||
|
beautifulsoup4 = "*"
|
||||||
|
pendulum = "*"
|
||||||
|
|
||||||
|
[requires]
|
||||||
|
python_version = "3.7"
|
||||||
185
Pipfile.lock
generated
Normal file
185
Pipfile.lock
generated
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
{
|
||||||
|
"_meta": {
|
||||||
|
"hash": {
|
||||||
|
"sha256": "c4123f8147ddb51b8a343ecb90f3c00907f83ef681abfb8f678f7d1edb58dd9b"
|
||||||
|
},
|
||||||
|
"pipfile-spec": 6,
|
||||||
|
"requires": {
|
||||||
|
"python_version": "3.7"
|
||||||
|
},
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
"name": "pypi",
|
||||||
|
"url": "https://pypi.org/simple",
|
||||||
|
"verify_ssl": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"beautifulsoup4": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:034740f6cb549b4e932ae1ab975581e6103ac8f942200a0e9759065984391858",
|
||||||
|
"sha256:945065979fb8529dd2f37dbb58f00b661bdbcbebf954f93b32fdf5263ef35348",
|
||||||
|
"sha256:ba6d5c59906a85ac23dadfe5c88deaf3e179ef565f4898671253e50a78680718"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"version": "==4.7.1"
|
||||||
|
},
|
||||||
|
"certifi": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:47f9c83ef4c0c621eaef743f133f09fa8a74a9b75f037e8624f83bd1b6626cb7",
|
||||||
|
"sha256:993f830721089fef441cdfeb4b2c8c9df86f0c63239f06bd025a76a7daddb033"
|
||||||
|
],
|
||||||
|
"version": "==2018.11.29"
|
||||||
|
},
|
||||||
|
"chardet": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
|
||||||
|
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
|
||||||
|
],
|
||||||
|
"version": "==3.0.4"
|
||||||
|
},
|
||||||
|
"idna": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407",
|
||||||
|
"sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c"
|
||||||
|
],
|
||||||
|
"version": "==2.8"
|
||||||
|
},
|
||||||
|
"pendulum": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:0f43d963b27e92b04047ce8352e4c277db99f20d0b513df7d0ceafe674a2f727",
|
||||||
|
"sha256:14e60d26d7400980123dbb6e3f2a90b70d7c18c63742ffe5bd6d6a643f8c6ef1",
|
||||||
|
"sha256:5035a4e17504814a679f138374269cc7cc514aeac7ba6d9dc020abc224f25dbc",
|
||||||
|
"sha256:8c0b3d655c1e9205d4dacf42fffc929cde3b19b5fb544a7f7561e6896eb8a000",
|
||||||
|
"sha256:bfc7b33ae193a204ec0bec12ad0d2d3300cd7e51d91d992da525ba3b28f0d265",
|
||||||
|
"sha256:cd70b75800439794e1ad8dbfa24838845e171918df81fa98b68d0d5a6f9b8bf2",
|
||||||
|
"sha256:cf535d36c063575d4752af36df928882b2e0e31541b4482c97d63752785f9fcb"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"version": "==2.0.4"
|
||||||
|
},
|
||||||
|
"python-dateutil": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:063df5763652e21de43de7d9e00ccf239f953a832941e37be541614732cdfc93",
|
||||||
|
"sha256:88f9287c0174266bb0d8cedd395cfba9c58e87e5ad86b2ce58859bc11be3cf02"
|
||||||
|
],
|
||||||
|
"version": "==2.7.5"
|
||||||
|
},
|
||||||
|
"pytzdata": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:10c74b0cfc51a9269031f86ecd11096c9c6a141f5bb15a3b8a88f9979f6361e2",
|
||||||
|
"sha256:279cbd9900d5da9a8f9053e60db0db7f42d9a799673744b76aaeb6b4f14abe77"
|
||||||
|
],
|
||||||
|
"version": "==2018.7"
|
||||||
|
},
|
||||||
|
"requests": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:502a824f31acdacb3a35b6690b5fbf0bc41d63a24a45c4004352b0242707598e",
|
||||||
|
"sha256:7bf2a778576d825600030a110f3c0e3e8edc51dfaafe1c146e39a2027784957b"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"version": "==2.21.0"
|
||||||
|
},
|
||||||
|
"six": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
|
||||||
|
"sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
|
||||||
|
],
|
||||||
|
"version": "==1.12.0"
|
||||||
|
},
|
||||||
|
"soupsieve": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:10687fc53eeb3518e01a0ac84d3d711da623d3298a3039459d3f649927c4a270",
|
||||||
|
"sha256:b23a0d7da0247200fe83c67c34de9d7599ad404106367313d8e65e04174d0b4b"
|
||||||
|
],
|
||||||
|
"version": "==1.7.2"
|
||||||
|
},
|
||||||
|
"urllib3": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39",
|
||||||
|
"sha256:de9529817c93f27c8ccbfead6985011db27bd0ddfcdb2d86f3f663385c6a9c22"
|
||||||
|
],
|
||||||
|
"version": "==1.24.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"develop": {
|
||||||
|
"astroid": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:35b032003d6a863f5dcd7ec11abd5cd5893428beaa31ab164982403bcb311f22",
|
||||||
|
"sha256:6a5d668d7dc69110de01cdf7aeec69a679ef486862a0850cc0fd5571505b6b7e"
|
||||||
|
],
|
||||||
|
"version": "==2.1.0"
|
||||||
|
},
|
||||||
|
"isort": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:1153601da39a25b14ddc54955dbbacbb6b2d19135386699e2ad58517953b34af",
|
||||||
|
"sha256:b9c40e9750f3d77e6e4d441d8b0266cf555e7cdabdcff33c4fd06366ca761ef8",
|
||||||
|
"sha256:ec9ef8f4a9bc6f71eec99e1806bfa2de401650d996c59330782b89a5555c1497"
|
||||||
|
],
|
||||||
|
"version": "==4.3.4"
|
||||||
|
},
|
||||||
|
"lazy-object-proxy": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:0ce34342b419bd8f018e6666bfef729aec3edf62345a53b537a4dcc115746a33",
|
||||||
|
"sha256:1b668120716eb7ee21d8a38815e5eb3bb8211117d9a90b0f8e21722c0758cc39",
|
||||||
|
"sha256:209615b0fe4624d79e50220ce3310ca1a9445fd8e6d3572a896e7f9146bbf019",
|
||||||
|
"sha256:27bf62cb2b1a2068d443ff7097ee33393f8483b570b475db8ebf7e1cba64f088",
|
||||||
|
"sha256:27ea6fd1c02dcc78172a82fc37fcc0992a94e4cecf53cb6d73f11749825bd98b",
|
||||||
|
"sha256:2c1b21b44ac9beb0fc848d3993924147ba45c4ebc24be19825e57aabbe74a99e",
|
||||||
|
"sha256:2df72ab12046a3496a92476020a1a0abf78b2a7db9ff4dc2036b8dd980203ae6",
|
||||||
|
"sha256:320ffd3de9699d3892048baee45ebfbbf9388a7d65d832d7e580243ade426d2b",
|
||||||
|
"sha256:50e3b9a464d5d08cc5227413db0d1c4707b6172e4d4d915c1c70e4de0bbff1f5",
|
||||||
|
"sha256:5276db7ff62bb7b52f77f1f51ed58850e315154249aceb42e7f4c611f0f847ff",
|
||||||
|
"sha256:61a6cf00dcb1a7f0c773ed4acc509cb636af2d6337a08f362413c76b2b47a8dd",
|
||||||
|
"sha256:6ae6c4cb59f199d8827c5a07546b2ab7e85d262acaccaacd49b62f53f7c456f7",
|
||||||
|
"sha256:7661d401d60d8bf15bb5da39e4dd72f5d764c5aff5a86ef52a042506e3e970ff",
|
||||||
|
"sha256:7bd527f36a605c914efca5d3d014170b2cb184723e423d26b1fb2fd9108e264d",
|
||||||
|
"sha256:7cb54db3535c8686ea12e9535eb087d32421184eacc6939ef15ef50f83a5e7e2",
|
||||||
|
"sha256:7f3a2d740291f7f2c111d86a1c4851b70fb000a6c8883a59660d95ad57b9df35",
|
||||||
|
"sha256:81304b7d8e9c824d058087dcb89144842c8e0dea6d281c031f59f0acf66963d4",
|
||||||
|
"sha256:933947e8b4fbe617a51528b09851685138b49d511af0b6c0da2539115d6d4514",
|
||||||
|
"sha256:94223d7f060301b3a8c09c9b3bc3294b56b2188e7d8179c762a1cda72c979252",
|
||||||
|
"sha256:ab3ca49afcb47058393b0122428358d2fbe0408cf99f1b58b295cfeb4ed39109",
|
||||||
|
"sha256:bd6292f565ca46dee4e737ebcc20742e3b5be2b01556dafe169f6c65d088875f",
|
||||||
|
"sha256:cb924aa3e4a3fb644d0c463cad5bc2572649a6a3f68a7f8e4fbe44aaa6d77e4c",
|
||||||
|
"sha256:d0fc7a286feac9077ec52a927fc9fe8fe2fabab95426722be4c953c9a8bede92",
|
||||||
|
"sha256:ddc34786490a6e4ec0a855d401034cbd1242ef186c20d79d2166d6a4bd449577",
|
||||||
|
"sha256:e34b155e36fa9da7e1b7c738ed7767fc9491a62ec6af70fe9da4a057759edc2d",
|
||||||
|
"sha256:e5b9e8f6bda48460b7b143c3821b21b452cb3a835e6bbd5dd33aa0c8d3f5137d",
|
||||||
|
"sha256:e81ebf6c5ee9684be8f2c87563880f93eedd56dd2b6146d8a725b50b7e5adb0f",
|
||||||
|
"sha256:eb91be369f945f10d3a49f5f9be8b3d0b93a4c2be8f8a5b83b0571b8123e0a7a",
|
||||||
|
"sha256:f460d1ceb0e4a5dcb2a652db0904224f367c9b3c1470d5a7683c0480e582468b"
|
||||||
|
],
|
||||||
|
"version": "==1.3.1"
|
||||||
|
},
|
||||||
|
"mccabe": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
|
||||||
|
"sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
|
||||||
|
],
|
||||||
|
"version": "==0.6.1"
|
||||||
|
},
|
||||||
|
"pylint": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:689de29ae747642ab230c6d37be2b969bf75663176658851f456619aacf27492",
|
||||||
|
"sha256:771467c434d0d9f081741fec1d64dfb011ed26e65e12a28fe06ca2f61c4d556c"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"version": "==2.2.2"
|
||||||
|
},
|
||||||
|
"six": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
|
||||||
|
"sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
|
||||||
|
],
|
||||||
|
"version": "==1.12.0"
|
||||||
|
},
|
||||||
|
"wrapt": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:4aea003270831cceb8a90ff27c4031da6ead7ec1886023b80ce0dfe0adf61533"
|
||||||
|
],
|
||||||
|
"version": "==1.11.1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
15
README.md
15
README.md
@ -7,3 +7,18 @@ Scheduling automation for East Coast Aero Club's flight scheduling software.
|
|||||||
* Sync with Google calendar
|
* Sync with Google calendar
|
||||||
* Automatically schedule desired flight
|
* Automatically schedule desired flight
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pipenv install # install python deps
|
||||||
|
cp .env.example .env
|
||||||
|
$EDITOR .env # add your config
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
source .env
|
||||||
|
python -m autopilot.scrape
|
||||||
|
```
|
||||||
|
|
||||||
|
|||||||
0
autopilot/__init__.py
Normal file
0
autopilot/__init__.py
Normal file
32
autopilot/config.py
Normal file
32
autopilot/config.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
LOGGING_CONFIG = {
|
||||||
|
'version': 1,
|
||||||
|
'disable_existing_loggers': False,
|
||||||
|
'formatters': {
|
||||||
|
'standard': {
|
||||||
|
'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'handlers': {
|
||||||
|
'default': {
|
||||||
|
'level': 'INFO',
|
||||||
|
'formatter': 'standard',
|
||||||
|
'class': 'logging.StreamHandler',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'loggers': {
|
||||||
|
'': {
|
||||||
|
'handlers': ['default'],
|
||||||
|
'level': 'INFO',
|
||||||
|
'propagate': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
BASE_URL = 'https://ecac.aerocalendar.com'
|
||||||
|
TIME_ZONE = 'America/New_York' # of the club
|
||||||
|
|
||||||
|
USERNAME = os.environ.get('AUTOPILOT_USERNAME')
|
||||||
|
PASSWORD = os.environ.get('AUTOPILOT_PASSWORD')
|
||||||
|
USER_ID = os.environ.get('AUTOPILOT_USER_ID')
|
||||||
16
autopilot/reservation.py
Normal file
16
autopilot/reservation.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import pendulum
|
||||||
|
|
||||||
|
from autopilot import config
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Reservation(object):
|
||||||
|
pilot: str
|
||||||
|
instructor: str
|
||||||
|
aircraft: str
|
||||||
|
location: str
|
||||||
|
start: pendulum.DateTime
|
||||||
|
end: pendulum.DateTime
|
||||||
|
comments: str
|
||||||
113
autopilot/scrape.py
Normal file
113
autopilot/scrape.py
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
import logging
|
||||||
|
import logging.config
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
import pendulum
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from bs4.element import Tag
|
||||||
|
|
||||||
|
from autopilot import config
|
||||||
|
from autopilot.reservation import Reservation
|
||||||
|
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger('autopilot.scrape')
|
||||||
|
|
||||||
|
|
||||||
|
def init_session() -> requests.Session:
|
||||||
|
headers = {
|
||||||
|
# required for some requests or else it errors
|
||||||
|
'user-agent': ('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHT'
|
||||||
|
'ML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'),
|
||||||
|
}
|
||||||
|
session = requests.Session()
|
||||||
|
session.headers.update(headers)
|
||||||
|
return session
|
||||||
|
|
||||||
|
|
||||||
|
def authenticate(session: requests.Session):
|
||||||
|
credentials = {
|
||||||
|
'txtUserID': config.USERNAME,
|
||||||
|
'txtPassword': config.PASSWORD,
|
||||||
|
'mode': 'process', # set by client-side JS; required to auth
|
||||||
|
}
|
||||||
|
response = session.post(f'{config.BASE_URL}/login.asp', data=credentials)
|
||||||
|
if "Welcome" not in response.text:
|
||||||
|
raise ValueError('failed to authenticate')
|
||||||
|
|
||||||
|
|
||||||
|
def format_day(day: pendulum.DateTime) -> str:
|
||||||
|
return day.format('M/D/YYYY') # standard format throughout aerocalendar
|
||||||
|
|
||||||
|
|
||||||
|
def raw_schedule_html(session: requests.Session) -> BeautifulSoup:
|
||||||
|
today = pendulum.today(config.TIME_ZONE)
|
||||||
|
params = (
|
||||||
|
('date', format_day(today)),
|
||||||
|
('location', '1'), # Bedford
|
||||||
|
)
|
||||||
|
url = f'{config.BASE_URL}/MySchedule.asp'
|
||||||
|
data = {
|
||||||
|
'rdoDays': 56, # Look ahead 56 days
|
||||||
|
'txtStartDate': format_day(today),
|
||||||
|
'txtEndDate': format_day(today.end_of('week')),
|
||||||
|
'txtSaveLocation': 1,
|
||||||
|
'txtSaveDate': format_day(today),
|
||||||
|
'submittype': 'notes',
|
||||||
|
'reservation': '',
|
||||||
|
'cancelcode': '',
|
||||||
|
'submitted': 'true',
|
||||||
|
'user': config.USER_ID,
|
||||||
|
}
|
||||||
|
response = session.post(url, params=params, data=data)
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
|
||||||
|
def make_reservation_from_tag(tag: Tag) -> Reservation:
|
||||||
|
"""Parse attributes from the title attribute of the reservation
|
||||||
|
|
||||||
|
Example raw string parsed:
|
||||||
|
|
||||||
|
Pilot : Ian Naval
|
||||||
|
Instructor : Agnelo Lopes CFI, CFII
|
||||||
|
Aircraft : N222ND
|
||||||
|
Location : Bedford
|
||||||
|
Start : 1/26/2019 12:00 PM
|
||||||
|
End : 1/26/2019 3:00 PM
|
||||||
|
Comments -----------------------------
|
||||||
|
"
|
||||||
|
"""
|
||||||
|
raw = tag.get('title')
|
||||||
|
lines = raw.replace('\t', '').split('\r\n')
|
||||||
|
attributes = {}
|
||||||
|
for line in lines:
|
||||||
|
if ':' in line:
|
||||||
|
key, value = line.split(':', maxsplit=1)
|
||||||
|
attributes[key.lower().strip()] = value.strip()
|
||||||
|
attributes['comments'] = lines[-2]
|
||||||
|
for datetime_attr in {'start', 'end'}:
|
||||||
|
parsed_datetime = pendulum.parse(attributes[datetime_attr],
|
||||||
|
strict=False,
|
||||||
|
tz=config.TIME_ZONE)
|
||||||
|
attributes[datetime_attr] = parsed_datetime
|
||||||
|
return Reservation(**attributes)
|
||||||
|
|
||||||
|
|
||||||
|
def get_reservations(session: requests.Session) -> List[Reservation]:
|
||||||
|
html = raw_schedule_html(session)
|
||||||
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
|
return [make_reservation_from_tag(tag) for tag in soup.select('td.cR')]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
logging.config.dictConfig(config.LOGGING_CONFIG)
|
||||||
|
session = init_session()
|
||||||
|
authenticate(session)
|
||||||
|
LOGGER.info("Authentication successful")
|
||||||
|
reservations = get_reservations(session)
|
||||||
|
import pdb; pdb.set_trace()
|
||||||
|
LOGGER.info("Got %d reservations", len(reservations))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
x
Reference in New Issue
Block a user