84 lines
2.4 KiB
Python
84 lines
2.4 KiB
Python
import re
|
|
import random
|
|
import requests
|
|
from flask import Flask, request, jsonify, send_from_directory
|
|
|
|
app = Flask(__name__, static_folder="static")
|
|
|
|
USER_AGENTS = [
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
]
|
|
|
|
def get_headers():
|
|
return {
|
|
"User-Agent": random.choice(USER_AGENTS),
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
}
|
|
|
|
def scrape_images(query, page=0):
|
|
# Bing image search async endpoint — returns HTML with embedded image URLs
|
|
try:
|
|
resp = requests.get(
|
|
"https://www.bing.com/images/async",
|
|
params={
|
|
"q": query,
|
|
"first": str(page * 35),
|
|
"count": "35",
|
|
"mmasync": "1",
|
|
"adlt": "off",
|
|
},
|
|
cookies={"SRCHHPGUSR": "ADLT=OFF"},
|
|
headers=get_headers(),
|
|
timeout=10,
|
|
)
|
|
resp.raise_for_status()
|
|
except requests.RequestException as e:
|
|
return [], str(e)
|
|
|
|
# Bing HTML-encodes image data: purl = source page, murl = image URL
|
|
entries = re.findall(
|
|
r'purl":"(https?://[^&]+)".*?murl":"(https?://[^&]+)"',
|
|
resp.text,
|
|
)
|
|
|
|
seen = set()
|
|
images = []
|
|
for page_url, img_url in entries:
|
|
if img_url in seen:
|
|
continue
|
|
seen.add(img_url)
|
|
images.append({"url": img_url, "source": page_url})
|
|
if len(images) >= 30:
|
|
break
|
|
|
|
return images, None
|
|
|
|
|
|
@app.route("/api/search")
|
|
def search():
|
|
q = request.args.get("q", "").strip()
|
|
page = max(0, int(request.args.get("page", 0)))
|
|
if not q:
|
|
return jsonify({"error": "missing query"}), 400
|
|
images, err = scrape_images(q, page)
|
|
if err:
|
|
return jsonify({"error": err}), 502
|
|
return jsonify({"images": images, "page": page, "query": q})
|
|
|
|
|
|
@app.route("/")
|
|
def index():
|
|
return send_from_directory("static", "index.html")
|
|
|
|
|
|
@app.route("/<path:path>")
|
|
def static_files(path):
|
|
return send_from_directory("static", path)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
app.run(host="0.0.0.0", port=5000)
|