Scrape Flickr images

This commit is contained in:
coolneng 2020-06-14 21:24:27 +02:00
parent 58c0f4897d
commit 23dea062e5
Signed by: coolneng
GPG Key ID: 9893DA236405AF57
2 changed files with 19 additions and 13 deletions

View File

@ -1,20 +1,25 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import urllib.request from requests import get
import re from constants import FLICKR_URL
from re import findall
from typing import List
def request_Flickr(keywords): def request_flickr(keywords) -> str:
datos=urllib.request.urlopen("https://commons.wikimedia.org/w/index.php?search={keywords}&title=Special%3ASearch&go=Go&ns0=1&ns6=1&ns12=1&ns14=1&ns100=1&ns106=1").read().decode() search_url = FLICKR_URL.format(keywords)
result = get(search_url)
return datos; html = result.text
return html
def scrap_Flickr(datos): def scrap_flickr(keywords) -> List[str]:
soup=BeautifulSoup(datos, features="lxml") html = request_flickr(keywords)
tag=soup.find("table", class_="searchResultImage") soup = BeautifulSoup(html, features="html.parser")
images=tag.find_all("a", class_="image") images = soup.find_all(
for image in images: "div", class_="view photo-list-photo-view requiredToShowOnServer awake",
print(image["href"]) )
image_links = findall("(live.staticflickr.com/\S+.jpg)", str(images))
return image_links
scrap_Flickr(request_Flickr("paris")) scrap_flickr("paris")

View File

@ -5,6 +5,7 @@ DATASETS = [
"deconfinement-rues-amenagees-pour-pietons", "deconfinement-rues-amenagees-pour-pietons",
] ]
URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1" URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1"
FLICKR_URL = "https://www.flickr.com/search/?text={}"
COLUMNS = { COLUMNS = {
"deconfinement-pistes-cyclables-temporaires": [ "deconfinement-pistes-cyclables-temporaires": [
"fields.geo_shape.coordinates", "fields.geo_shape.coordinates",