diff --git a/app/data_request.py b/app/data_request.py index 0479a50..9a44167 100644 --- a/app/data_request.py +++ b/app/data_request.py @@ -1,6 +1,10 @@ +from re import findall +from typing import List + +from bs4 import BeautifulSoup from requests import get -from constants import URL +from constants import FLICKR_URL, URL def format_url(dataset) -> str: @@ -21,3 +25,26 @@ def request_dataset(dataset): response.raise_for_status() data = response.json() return data + + +def request_flickr(keywords) -> str: + """ + Returns the HTML of a Flickr search + """ + search_url = FLICKR_URL.format(keywords) + result = get(search_url) + html = result.text + return html + + +def scrap_flickr(keywords) -> List[str]: + """ + Creates a list of image links from a Flickr search + """ + html = request_flickr(keywords) + soup = BeautifulSoup(html, features="html.parser") + images = soup.find_all( + "div", class_="view photo-list-photo-view requiredToShowOnServer awake", + ) + image_links = findall("(live.staticflickr.com/\S+.jpg)", str(images)) + return image_links diff --git a/app/pruebascraping.py b/app/pruebascraping.py deleted file mode 100644 index 4d396ee..0000000 --- a/app/pruebascraping.py +++ /dev/null @@ -1,25 +0,0 @@ -from bs4 import BeautifulSoup -from requests import get -from constants import FLICKR_URL -from re import findall -from typing import List - - -def request_flickr(keywords) -> str: - search_url = FLICKR_URL.format(keywords) - result = get(search_url) - html = result.text - return html - - -def scrap_flickr(keywords) -> List[str]: - html = request_flickr(keywords) - soup = BeautifulSoup(html, features="html.parser") - images = soup.find_all( - "div", class_="view photo-list-photo-view requiredToShowOnServer awake", - ) - image_links = findall("(live.staticflickr.com/\S+.jpg)", str(images)) - return image_links - - -scrap_flickr("paris")