initial diashow webapp
This commit is contained in:
91
app.py
Normal file
91
app.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import re
|
||||
import random
|
||||
import requests
|
||||
from flask import Flask, request, jsonify, send_from_directory
|
||||
|
||||
app = Flask(__name__, static_folder="static")
|
||||
|
||||
USER_AGENTS = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
||||
]
|
||||
|
||||
def get_headers():
|
||||
return {
|
||||
"User-Agent": random.choice(USER_AGENTS),
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"DNT": "1",
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
}
|
||||
|
||||
def scrape_images(query, page=0):
|
||||
url = "https://www.google.com/search"
|
||||
params = {
|
||||
"q": query,
|
||||
"tbm": "isch",
|
||||
"ijn": str(page),
|
||||
"start": str(page * 20),
|
||||
"asearch": "ichunk",
|
||||
"async": "_id:rg_s,_pms:s,_fmt:pc",
|
||||
}
|
||||
try:
|
||||
resp = requests.get(url, params=params, headers=get_headers(), timeout=10)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as e:
|
||||
return [], str(e)
|
||||
|
||||
# Extract original image URLs: Google embeds them as "ou":"https://..."
|
||||
ou_matches = re.findall(r'"ou"\s*:\s*"(https?://[^"]+)"', resp.text)
|
||||
|
||||
# Fallback: look for image URLs in JSON arrays ["https://...", width, height]
|
||||
if not ou_matches:
|
||||
ou_matches = re.findall(
|
||||
r'(?:^|[^"])(https?://(?!encrypted-tbn)[^"\'\\]+\.(?:jpg|jpeg|png|gif|webp)(?:\?[^"\'\\]*)?)',
|
||||
resp.text,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
seen = set()
|
||||
images = []
|
||||
for img_url in ou_matches:
|
||||
if img_url in seen:
|
||||
continue
|
||||
seen.add(img_url)
|
||||
# skip google's own thumbnails / tracking pixels
|
||||
if "google.com" in img_url or "gstatic.com" in img_url:
|
||||
continue
|
||||
images.append({"url": img_url})
|
||||
if len(images) >= 20:
|
||||
break
|
||||
|
||||
return images, None
|
||||
|
||||
|
||||
@app.route("/api/search")
|
||||
def search():
|
||||
q = request.args.get("q", "").strip()
|
||||
page = max(0, int(request.args.get("page", 0)))
|
||||
if not q:
|
||||
return jsonify({"error": "missing query"}), 400
|
||||
images, err = scrape_images(q, page)
|
||||
if err:
|
||||
return jsonify({"error": err}), 502
|
||||
return jsonify({"images": images, "page": page, "query": q})
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
return send_from_directory("static", "index.html")
|
||||
|
||||
|
||||
@app.route("/<path:path>")
|
||||
def static_files(path):
|
||||
return send_from_directory("static", path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host="0.0.0.0", port=5000)
|
||||
Reference in New Issue
Block a user