add source page url, open with e key
This commit is contained in:
11
app.py
11
app.py
@@ -38,16 +38,19 @@ def scrape_images(query, page=0):
|
||||
except requests.RequestException as e:
|
||||
return [], str(e)
|
||||
|
||||
# Bing encodes original image URLs as murl":"https://..."
|
||||
raw_urls = re.findall(r'murl":"(https?://[^&]+)"', resp.text)
|
||||
# Bing HTML-encodes image data: purl = source page, murl = image URL
|
||||
entries = re.findall(
|
||||
r'purl":"(https?://[^&]+)".*?murl":"(https?://[^&]+)"',
|
||||
resp.text,
|
||||
)
|
||||
|
||||
seen = set()
|
||||
images = []
|
||||
for img_url in raw_urls:
|
||||
for page_url, img_url in entries:
|
||||
if img_url in seen:
|
||||
continue
|
||||
seen.add(img_url)
|
||||
images.append({"url": img_url})
|
||||
images.append({"url": img_url, "source": page_url})
|
||||
if len(images) >= 30:
|
||||
break
|
||||
|
||||
|
||||
Reference in New Issue
Block a user