From 58c0f4897dfcdcdd8f1301f41af3a4e6e57c35e7 Mon Sep 17 00:00:00 2001 From: basset Date: Sun, 14 Jun 2020 18:33:05 +0200 Subject: [PATCH] =?UTF-8?q?Testing=20del=20scraping=20de=20la=20fuente=20d?= =?UTF-8?q?e=20im=C3=A1genes.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/pruebascraping.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 app/pruebascraping.py diff --git a/app/pruebascraping.py b/app/pruebascraping.py new file mode 100644 index 0000000..0c2b75d --- /dev/null +++ b/app/pruebascraping.py @@ -0,0 +1,20 @@ +from bs4 import BeautifulSoup +import urllib.request +import re + + +def request_Flickr(keywords): + datos=urllib.request.urlopen("https://commons.wikimedia.org/w/index.php?search={keywords}&title=Special%3ASearch&go=Go&ns0=1&ns6=1&ns12=1&ns14=1&ns100=1&ns106=1").read().decode() + + return datos; + + +def scrap_Flickr(datos): + soup=BeautifulSoup(datos, features="lxml") + tag=soup.find("table", class_="searchResultImage") + images=tag.find_all("a", class_="image") + for image in images: + print(image["href"]) + + +scrap_Flickr(request_Flickr("paris"))