from re import findall from typing import List from bs4 import BeautifulSoup from requests import get from constants import FLICKR_URL, URL def format_url(dataset) -> str: """ Constructs the API's URL for the requested dataset """ link = URL.format(dataset) return link def request_dataset(dataset): """ Fetches the requested dataset from opendata's API Raises an exception if there's an HTTP error """ url = format_url(dataset) response = get(url) response.raise_for_status() data = response.json() return data def request_flickr(keywords) -> str: """ Returns the HTML of a Flickr search """ search_url = FLICKR_URL.format(keywords) result = get(search_url) html = result.text return html def scrap_flickr(keywords) -> List[str]: """ Creates a list of image links from a Flickr search """ html = request_flickr(keywords) soup = BeautifulSoup(html, features="html.parser") images = soup.find_all( "div", class_="view photo-list-photo-view requiredToShowOnServer awake", ) image_links = findall("(live.staticflickr.com/\S+.jpg)", str(images)) return image_links