2020-06-14 18:33:05 +02:00
|
|
|
from bs4 import BeautifulSoup
|
2020-06-14 21:24:27 +02:00
|
|
|
from requests import get
|
|
|
|
from constants import FLICKR_URL
|
|
|
|
from re import findall
|
|
|
|
from typing import List
|
2020-06-14 18:33:05 +02:00
|
|
|
|
|
|
|
|
2020-06-14 21:24:27 +02:00
|
|
|
def request_flickr(keywords) -> str:
|
|
|
|
search_url = FLICKR_URL.format(keywords)
|
|
|
|
result = get(search_url)
|
|
|
|
html = result.text
|
|
|
|
return html
|
2020-06-14 18:33:05 +02:00
|
|
|
|
|
|
|
|
2020-06-14 21:24:27 +02:00
|
|
|
def scrap_flickr(keywords) -> List[str]:
|
|
|
|
html = request_flickr(keywords)
|
|
|
|
soup = BeautifulSoup(html, features="html.parser")
|
|
|
|
images = soup.find_all(
|
|
|
|
"div", class_="view photo-list-photo-view requiredToShowOnServer awake",
|
|
|
|
)
|
|
|
|
image_links = findall("(live.staticflickr.com/\S+.jpg)", str(images))
|
|
|
|
return image_links
|
2020-06-14 18:33:05 +02:00
|
|
|
|
|
|
|
|
2020-06-14 21:24:27 +02:00
|
|
|
scrap_flickr("paris")
|