Scrape Flickr images
This commit is contained in:
parent
58c0f4897d
commit
23dea062e5
|
@ -1,20 +1,25 @@
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import urllib.request
|
from requests import get
|
||||||
import re
|
from constants import FLICKR_URL
|
||||||
|
from re import findall
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
|
||||||
def request_Flickr(keywords):
|
def request_flickr(keywords) -> str:
|
||||||
datos=urllib.request.urlopen("https://commons.wikimedia.org/w/index.php?search={keywords}&title=Special%3ASearch&go=Go&ns0=1&ns6=1&ns12=1&ns14=1&ns100=1&ns106=1").read().decode()
|
search_url = FLICKR_URL.format(keywords)
|
||||||
|
result = get(search_url)
|
||||||
return datos;
|
html = result.text
|
||||||
|
return html
|
||||||
|
|
||||||
|
|
||||||
def scrap_Flickr(datos):
|
def scrap_flickr(keywords) -> List[str]:
|
||||||
soup=BeautifulSoup(datos, features="lxml")
|
html = request_flickr(keywords)
|
||||||
tag=soup.find("table", class_="searchResultImage")
|
soup = BeautifulSoup(html, features="html.parser")
|
||||||
images=tag.find_all("a", class_="image")
|
images = soup.find_all(
|
||||||
for image in images:
|
"div", class_="view photo-list-photo-view requiredToShowOnServer awake",
|
||||||
print(image["href"])
|
)
|
||||||
|
image_links = findall("(live.staticflickr.com/\S+.jpg)", str(images))
|
||||||
|
return image_links
|
||||||
|
|
||||||
|
|
||||||
scrap_Flickr(request_Flickr("paris"))
|
scrap_flickr("paris")
|
||||||
|
|
|
@ -5,6 +5,7 @@ DATASETS = [
|
||||||
"deconfinement-rues-amenagees-pour-pietons",
|
"deconfinement-rues-amenagees-pour-pietons",
|
||||||
]
|
]
|
||||||
URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1"
|
URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1"
|
||||||
|
FLICKR_URL = "https://www.flickr.com/search/?text={}"
|
||||||
COLUMNS = {
|
COLUMNS = {
|
||||||
"deconfinement-pistes-cyclables-temporaires": [
|
"deconfinement-pistes-cyclables-temporaires": [
|
||||||
"fields.geo_shape.coordinates",
|
"fields.geo_shape.coordinates",
|
||||||
|
|
Loading…
Reference in New Issue