Files
slideshow/get_pics_kona.py
2025-07-11 20:42:51 +02:00

89 lines
2.7 KiB
Python

import os
import requests
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
URL = "https://konachan.com/" #https://e621.net/
# === CONFIGURATION ===
QUERY = "cleavage barefoot rating:questionableless" # Replace with your search query
SAVE_DIR = "semi_safe_cleave"
LIMIT = 10000
PER_PAGE = 100 # API max is 320, but let's be safe
USER_AGENT = "e621-downloader/1.0 (by username on e621)" # Replace 'username' with your e621 username
THREADS = 100 # Number of parallel download threads
# Ensure save directory exists
os.makedirs(SAVE_DIR, exist_ok=True)
headers = {"User-Agent": USER_AGENT}
all_posts = []
current_page = 1
total_fetched = 0
print("Fetching posts in batches...")
while total_fetched < LIMIT:
params = {
"tags": QUERY,
"limit": PER_PAGE,
"page": current_page,
}
response = requests.get(URL + "post.json", headers=headers, params=params)
response.raise_for_status()
data = response.json()
#print(data[0]["tags"])
posts = data
#posts = data.get("posts", [])
if not posts:
print(f"No more posts found at page {current_page}.")
break
all_posts.extend(posts)
total_fetched += len(posts)
print(f"Fetched {len(posts)} posts from page {current_page}. Total so far: {total_fetched}.")
current_page += 1
# Stop if we fetched fewer than requested in this batch (meaning it's probably the end)
if len(posts) < PER_PAGE:
break
# Trim to desired limit, just in case we overfetched
all_posts = all_posts[:LIMIT]
print(f"Total posts collected for download: {len(all_posts)}")
def download_file(post):
#file_url = post.get("file", {}).get("url")
file_url = post["file_url"]
if not file_url:
return f"Skipped: No file URL for post {post.get('id')}"
#file_name = os.path.basename(file_url)
file_name = str(post["id"]) + os.path.splitext(post["file_url"])[1]
file_path = os.path.join(SAVE_DIR, file_name)
try:
file_response = requests.get(file_url, headers=headers)
file_response.raise_for_status()
with open(file_path, "wb") as f:
f.write(file_response.content)
return f"Downloaded: {file_name}"
except Exception as e:
return f"Failed: {file_url} with error {e}"
print("Starting download...")
with ThreadPoolExecutor(max_workers=THREADS) as executor:
futures = [executor.submit(download_file, post) for post in all_posts]
for future in tqdm(as_completed(futures), total=len(futures), desc="Downloading"):
result = future.result()
if result.startswith("Failed") or result.startswith("Skipped"):
print(result)
print("Download complete!")