diff options
Diffstat (limited to 'usth/ICT3.2/prac/5/data/scrape.py')
-rwxr-xr-x | usth/ICT3.2/prac/5/data/scrape.py | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/usth/ICT3.2/prac/5/data/scrape.py b/usth/ICT3.2/prac/5/data/scrape.py new file mode 100755 index 0000000..27af3e0 --- /dev/null +++ b/usth/ICT3.2/prac/5/data/scrape.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +from contextlib import asynccontextmanager + +from asks.sessions import Session +from trio import Path, open_nursery, run + + +@asynccontextmanager +async def get(session, uri): + try: + response = await session.get(path=f'/{uri}') + print(response.url) + yield response + finally: + path = Path(f'{uri}.json') + await path.parent.mkdir(parents=True, exist_ok=True) + await path.write_bytes(response.content) + + +async def get_comments(session, post): + async with get(session, f'post/{post}/comment'): pass + + +async def get_posts(nursery, session, user=None): + uri = 'post' if user is None else f'user/{user}/post' + async with get(session, uri) as response: + for post in response.json()['data']: + nursery.start_soon(get_comments, session, post['id']) + + +async def get_users(nursery, session): + async with get(session, 'user') as response: + for user in response.json()['data']: + nursery.start_soon(get_posts, nursery, session, user['id']) + + +async def main(): + async with open_nursery() as nursery: + session = Session( + base_location='https://dummyapi.io', endpoint='/data/api', + headers={'app-id': '5f76987a523f015e7c5ae49b'}, connections=20) + nursery.start_soon(get_users, nursery, session) + nursery.start_soon(get_posts, nursery, session) + + +if __name__ == '__main__': run(main) |