slideshow/get_pics_kona.py

import os
import requests
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

URL = "https://konachan.com/" #https://e621.net/
# === CONFIGURATION ===
QUERY = "cleavage barefoot rating:questionableless"  # Replace with your search query
SAVE_DIR = "semi_safe_cleave"
LIMIT = 10000
PER_PAGE = 100  # API max is 320, but let's be safe
USER_AGENT = "e621-downloader/1.0 (by username on e621)"  # Replace 'username' with your e621 username
THREADS = 100  # Number of parallel download threads

# Ensure save directory exists
os.makedirs(SAVE_DIR, exist_ok=True)

headers = {"User-Agent": USER_AGENT}

all_posts = []
current_page = 1
total_fetched = 0

print("Fetching posts in batches...")

while total_fetched < LIMIT:
    params = {
        "tags": QUERY,
        "limit": PER_PAGE,
        "page": current_page,
    }

    response = requests.get(URL + "post.json", headers=headers, params=params)
    response.raise_for_status()
    data = response.json()
    #print(data[0]["tags"])
    posts = data
    #posts = data.get("posts", [])
    if not posts:
        print(f"No more posts found at page {current_page}.")
        break

    all_posts.extend(posts)
    total_fetched += len(posts)

    print(f"Fetched {len(posts)} posts from page {current_page}. Total so far: {total_fetched}.")

    current_page += 1

    # Stop if we fetched fewer than requested in this batch (meaning it's probably the end)
    if len(posts) < PER_PAGE:
        break

# Trim to desired limit, just in case we overfetched
all_posts = all_posts[:LIMIT]
print(f"Total posts collected for download: {len(all_posts)}")


def download_file(post):
    #file_url = post.get("file", {}).get("url")
    file_url = post["file_url"]
    if not file_url:
        return f"Skipped: No file URL for post {post.get('id')}"

    #file_name = os.path.basename(file_url)
    file_name = str(post["id"]) + os.path.splitext(post["file_url"])[1]
    file_path = os.path.join(SAVE_DIR, file_name)

    try:
        file_response = requests.get(file_url, headers=headers)
        file_response.raise_for_status()
        with open(file_path, "wb") as f:
            f.write(file_response.content)
        return f"Downloaded: {file_name}"
    except Exception as e:
        return f"Failed: {file_url} with error {e}"


print("Starting download...")

with ThreadPoolExecutor(max_workers=THREADS) as executor:
    futures = [executor.submit(download_file, post) for post in all_posts]
    for future in tqdm(as_completed(futures), total=len(futures), desc="Downloading"):
        result = future.result()
        if result.startswith("Failed") or result.startswith("Skipped"):
            print(result)

print("Download complete!")