diff --git a/app/pruebascraping.py b/app/pruebascraping.py new file mode 100644 index 0000000..4d396ee --- /dev/null +++ b/app/pruebascraping.py @@ -0,0 +1,25 @@ +from bs4 import BeautifulSoup +from requests import get +from constants import FLICKR_URL +from re import findall +from typing import List + + +def request_flickr(keywords) -> str: + search_url = FLICKR_URL.format(keywords) + result = get(search_url) + html = result.text + return html + + +def scrap_flickr(keywords) -> List[str]: + html = request_flickr(keywords) + soup = BeautifulSoup(html, features="html.parser") + images = soup.find_all( + "div", class_="view photo-list-photo-view requiredToShowOnServer awake", + ) + image_links = findall("(live.staticflickr.com/\S+.jpg)", str(images)) + return image_links + + +scrap_flickr("paris") diff --git a/constants.py b/constants.py index 8ae10eb..31ee60a 100644 --- a/constants.py +++ b/constants.py @@ -5,6 +5,7 @@ DATASETS = [ "deconfinement-rues-amenagees-pour-pietons", ] URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1" +FLICKR_URL = "https://www.flickr.com/search/?text={}" COLUMNS = { "deconfinement-pistes-cyclables-temporaires": [ "fields.geo_shape.coordinates",