about summary refs log tree commit diff
path: root/usth/ICT3.2/prac/5/data/scrape.py
diff options
context:
space:
mode:
Diffstat (limited to 'usth/ICT3.2/prac/5/data/scrape.py')
-rwxr-xr-xusth/ICT3.2/prac/5/data/scrape.py46
1 files changed, 46 insertions, 0 deletions
diff --git a/usth/ICT3.2/prac/5/data/scrape.py b/usth/ICT3.2/prac/5/data/scrape.py
new file mode 100755
index 0000000..27af3e0
--- /dev/null
+++ b/usth/ICT3.2/prac/5/data/scrape.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+from contextlib import asynccontextmanager
+
+from asks.sessions import Session
+from trio import Path, open_nursery, run
+
+
+@asynccontextmanager
+async def get(session, uri):
+    try:
+        response = await session.get(path=f'/{uri}')
+        print(response.url)
+        yield response
+    finally:
+        path = Path(f'{uri}.json')
+        await path.parent.mkdir(parents=True, exist_ok=True)
+        await path.write_bytes(response.content)
+
+
+async def get_comments(session, post):
+    async with get(session, f'post/{post}/comment'): pass
+
+
+async def get_posts(nursery, session, user=None):
+    uri = 'post' if user is None else f'user/{user}/post'
+    async with get(session, uri) as response:
+        for post in response.json()['data']:
+            nursery.start_soon(get_comments, session, post['id'])
+
+
+async def get_users(nursery, session):
+    async with get(session, 'user') as response:
+        for user in response.json()['data']:
+            nursery.start_soon(get_posts, nursery, session, user['id'])
+
+
+async def main():
+    async with open_nursery() as nursery:
+        session = Session(
+            base_location='https://dummyapi.io', endpoint='/data/api',
+            headers={'app-id': '5f76987a523f015e7c5ae49b'}, connections=20)
+        nursery.start_soon(get_users, nursery, session)
+        nursery.start_soon(get_posts, nursery, session)
+
+
+if __name__ == '__main__': run(main)