Move web scraping logic into data_request

2020-06-14 21:29:42 +02:00 · 2020-06-14 21:29:42 +02:00 · dd7f1bab8d
commit dd7f1bab8d
parent b74ceb05c8
2 changed files with 28 additions and 26 deletions
--- a/app/data_request.py
+++ b/app/data_request.py
@ -1,6 +1,10 @@
+from re import findall
+from typing import List
+
+from bs4 import BeautifulSoup
 from requests import get

-from constants import URL
+from constants import FLICKR_URL, URL


 def format_url(dataset) -> str:
@ -21,3 +25,26 @@ def request_dataset(dataset):
    response.raise_for_status()
    data = response.json()
    return data
+
+
+def request_flickr(keywords) -> str:
+    """
+    Returns the HTML of a Flickr search
+    """
+    search_url = FLICKR_URL.format(keywords)
+    result = get(search_url)
+    html = result.text
+    return html
+
+
+def scrap_flickr(keywords) -> List[str]:
+    """
+    Creates a list of image links from a Flickr search
+    """
+    html = request_flickr(keywords)
+    soup = BeautifulSoup(html, features="html.parser")
+    images = soup.find_all(
+        "div", class_="view photo-list-photo-view requiredToShowOnServer awake",
+    )
+    image_links = findall("(live.staticflickr.com/\S+.jpg)", str(images))
+    return image_links
--- a/app/pruebascraping.py
+++ b/app/pruebascraping.py
@ -1,25 +0,0 @@
-from bs4 import BeautifulSoup
-from requests import get
-from constants import FLICKR_URL
-from re import findall
-from typing import List
-
-
-def request_flickr(keywords) -> str:
-    search_url = FLICKR_URL.format(keywords)
-    result = get(search_url)
-    html = result.text
-    return html
-
-
-def scrap_flickr(keywords) -> List[str]:
-    html = request_flickr(keywords)
-    soup = BeautifulSoup(html, features="html.parser")
-    images = soup.find_all(
-        "div", class_="view photo-list-photo-view requiredToShowOnServer awake",
-    )
-    image_links = findall("(live.staticflickr.com/\S+.jpg)", str(images))
-    return image_links
-
-
-scrap_flickr("paris")