about summary refs log tree commit diff
path: root/usth/ICT3.2/prac/5/data/scrape.py
blob: 27af3e03d959a843e3c42f22214a471f1a2b3b81 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/env python3
from contextlib import asynccontextmanager

from asks.sessions import Session
from trio import Path, open_nursery, run


@asynccontextmanager
async def get(session, uri):
    try:
        response = await session.get(path=f'/{uri}')
        print(response.url)
        yield response
    finally:
        path = Path(f'{uri}.json')
        await path.parent.mkdir(parents=True, exist_ok=True)
        await path.write_bytes(response.content)


async def get_comments(session, post):
    async with get(session, f'post/{post}/comment'): pass


async def get_posts(nursery, session, user=None):
    uri = 'post' if user is None else f'user/{user}/post'
    async with get(session, uri) as response:
        for post in response.json()['data']:
            nursery.start_soon(get_comments, session, post['id'])


async def get_users(nursery, session):
    async with get(session, 'user') as response:
        for user in response.json()['data']:
            nursery.start_soon(get_posts, nursery, session, user['id'])


async def main():
    async with open_nursery() as nursery:
        session = Session(
            base_location='https://dummyapi.io', endpoint='/data/api',
            headers={'app-id': '5f76987a523f015e7c5ae49b'}, connections=20)
        nursery.start_soon(get_users, nursery, session)
        nursery.start_soon(get_posts, nursery, session)


if __name__ == '__main__': run(main)