Files
diashow/app.py

79 lines
2.3 KiB
Python

import re
import random
import requests
from flask import Flask, request, jsonify, send_from_directory
app = Flask(__name__, static_folder="static")
USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
]
def get_headers():
return {
"User-Agent": random.choice(USER_AGENTS),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
}
def scrape_images(query, page=0):
# Bing image search async endpoint — returns HTML with embedded image URLs
try:
resp = requests.get(
"https://www.bing.com/images/async",
params={
"q": query,
"first": str(page * 35),
"count": "35",
"mmasync": "1",
},
headers=get_headers(),
timeout=10,
)
resp.raise_for_status()
except requests.RequestException as e:
return [], str(e)
# Bing encodes original image URLs as murl":"https://..."
raw_urls = re.findall(r'murl":"(https?://[^&]+)"', resp.text)
seen = set()
images = []
for img_url in raw_urls:
if img_url in seen:
continue
seen.add(img_url)
images.append({"url": img_url})
if len(images) >= 30:
break
return images, None
@app.route("/api/search")
def search():
q = request.args.get("q", "").strip()
page = max(0, int(request.args.get("page", 0)))
if not q:
return jsonify({"error": "missing query"}), 400
images, err = scrape_images(q, page)
if err:
return jsonify({"error": err}), 502
return jsonify({"images": images, "page": page, "query": q})
@app.route("/")
def index():
return send_from_directory("static", "index.html")
@app.route("/<path:path>")
def static_files(path):
return send_from_directory("static", path)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5000)