From 23dea062e575674fd850aade99a18a1d30aaaa9f Mon Sep 17 00:00:00 2001
From: coolneng <akasroua@gmail.com>
Date: Sun, 14 Jun 2020 21:24:27 +0200
Subject: [PATCH] Scrape Flickr images

---
 app/pruebascraping.py | 31 ++++++++++++++++++-------------
 constants.py          |  1 +
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/app/pruebascraping.py b/app/pruebascraping.py
index 0c2b75d..4d396ee 100644
--- a/app/pruebascraping.py
+++ b/app/pruebascraping.py
@@ -1,20 +1,25 @@
 from bs4 import BeautifulSoup
-import urllib.request
-import re
+from requests import get
+from constants import FLICKR_URL
+from re import findall
+from typing import List
 
 
-def request_Flickr(keywords):
-    datos=urllib.request.urlopen("https://commons.wikimedia.org/w/index.php?search={keywords}&title=Special%3ASearch&go=Go&ns0=1&ns6=1&ns12=1&ns14=1&ns100=1&ns106=1").read().decode()
-
-    return datos;
+def request_flickr(keywords) -> str:
+    search_url = FLICKR_URL.format(keywords)
+    result = get(search_url)
+    html = result.text
+    return html
 
 
-def scrap_Flickr(datos):
-    soup=BeautifulSoup(datos, features="lxml")
-    tag=soup.find("table", class_="searchResultImage")
-    images=tag.find_all("a", class_="image")
-    for image in images:
-        print(image["href"])
+def scrap_flickr(keywords) -> List[str]:
+    html = request_flickr(keywords)
+    soup = BeautifulSoup(html, features="html.parser")
+    images = soup.find_all(
+        "div", class_="view photo-list-photo-view requiredToShowOnServer awake",
+    )
+    image_links = findall("(live.staticflickr.com/\S+.jpg)", str(images))
+    return image_links
 
 
-scrap_Flickr(request_Flickr("paris"))
+scrap_flickr("paris")
diff --git a/constants.py b/constants.py
index 8ae10eb..31ee60a 100644
--- a/constants.py
+++ b/constants.py
@@ -5,6 +5,7 @@ DATASETS = [
     "deconfinement-rues-amenagees-pour-pietons",
 ]
 URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1"
+FLICKR_URL = "https://www.flickr.com/search/?text={}"
 COLUMNS = {
     "deconfinement-pistes-cyclables-temporaires": [
         "fields.geo_shape.coordinates",