import re import random import requests from flask import Flask, request, jsonify, send_from_directory app = Flask(__name__, static_folder="static") USER_AGENTS = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", ] def get_headers(): return { "User-Agent": random.choice(USER_AGENTS), "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Accept-Encoding": "gzip, deflate, br", "DNT": "1", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", } def scrape_images(query, page=0): url = "https://www.google.com/search" params = { "q": query, "tbm": "isch", "ijn": str(page), "start": str(page * 20), "asearch": "ichunk", "async": "_id:rg_s,_pms:s,_fmt:pc", } try: resp = requests.get(url, params=params, headers=get_headers(), timeout=10) resp.raise_for_status() except requests.RequestException as e: return [], str(e) # Extract original image URLs: Google embeds them as "ou":"https://..." ou_matches = re.findall(r'"ou"\s*:\s*"(https?://[^"]+)"', resp.text) # Fallback: look for image URLs in JSON arrays ["https://...", width, height] if not ou_matches: ou_matches = re.findall( r'(?:^|[^"])(https?://(?!encrypted-tbn)[^"\'\\]+\.(?:jpg|jpeg|png|gif|webp)(?:\?[^"\'\\]*)?)', resp.text, re.IGNORECASE, ) seen = set() images = [] for img_url in ou_matches: if img_url in seen: continue seen.add(img_url) # skip google's own thumbnails / tracking pixels if "google.com" in img_url or "gstatic.com" in img_url: continue images.append({"url": img_url}) if len(images) >= 20: break return images, None @app.route("/api/search") def search(): q = request.args.get("q", "").strip() page = max(0, int(request.args.get("page", 0))) if not q: return jsonify({"error": "missing query"}), 400 images, err = scrape_images(q, page) if err: return jsonify({"error": err}), 502 return jsonify({"images": images, "page": page, "query": q}) @app.route("/") def index(): return send_from_directory("static", "index.html") @app.route("/") def static_files(path): return send_from_directory("static", path) if __name__ == "__main__": app.run(host="0.0.0.0", port=5000)