90 lines
2.6 KiB
Python
90 lines
2.6 KiB
Python
import os
|
|
import requests
|
|
from tqdm import tqdm
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
|
URL = "https://yande.re/" #https://e621.net/
|
|
# === CONFIGURATION ===
|
|
QUERY = "loli bondage" # Replace with your search query
|
|
SAVE_DIR = "lori_bondage"
|
|
LIMIT = 10000
|
|
PER_PAGE = 100 # API max is 320, but let's be safe
|
|
USER_AGENT = "e621-downloader/1.0 (by username on e621)" # Replace 'username' with your e621 username
|
|
THREADS = 100 # Number of parallel download threads
|
|
|
|
# Ensure save directory exists
|
|
os.makedirs(SAVE_DIR, exist_ok=True)
|
|
|
|
headers = {"User-Agent": USER_AGENT}
|
|
|
|
all_posts = []
|
|
current_page = 1
|
|
total_fetched = 0
|
|
|
|
print("Fetching posts in batches...")
|
|
|
|
while total_fetched < LIMIT:
|
|
params = {
|
|
"tags": QUERY,
|
|
"limit": PER_PAGE,
|
|
"page": current_page,
|
|
}
|
|
|
|
response = requests.get(URL + "post.json", headers=headers, params=params)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
#print(data[0]["tags"])
|
|
posts = data
|
|
#print (posts)
|
|
#posts = data.get("posts", [])
|
|
if not posts:
|
|
print(f"No more posts found at page {current_page}.")
|
|
break
|
|
|
|
all_posts.extend(posts)
|
|
total_fetched += len(posts)
|
|
|
|
print(f"Fetched {len(posts)} posts from page {current_page}. Total so far: {total_fetched}.")
|
|
|
|
current_page += 1
|
|
|
|
# Stop if we fetched fewer than requested in this batch (meaning it's probably the end)
|
|
if len(posts) < PER_PAGE:
|
|
break
|
|
|
|
# Trim to desired limit, just in case we overfetched
|
|
all_posts = all_posts[:LIMIT]
|
|
print(f"Total posts collected for download: {len(all_posts)}")
|
|
|
|
|
|
def download_file(post):
|
|
#file_url = post.get("file", {}).get("url")
|
|
file_url = post["file_url"]
|
|
if not file_url:
|
|
return f"Skipped: No file URL for post {post.get('id')}"
|
|
|
|
#file_name = os.path.basename(file_url)
|
|
file_name = str(post["id"]) + os.path.splitext(post["file_url"])[1]
|
|
file_path = os.path.join(SAVE_DIR, file_name)
|
|
|
|
try:
|
|
file_response = requests.get(file_url, headers=headers)
|
|
file_response.raise_for_status()
|
|
with open(file_path, "wb") as f:
|
|
f.write(file_response.content)
|
|
return f"Downloaded: {file_name}"
|
|
except Exception as e:
|
|
return f"Failed: {file_url} with error {e}"
|
|
|
|
|
|
print("Starting download...")
|
|
|
|
with ThreadPoolExecutor(max_workers=THREADS) as executor:
|
|
futures = [executor.submit(download_file, post) for post in all_posts]
|
|
for future in tqdm(as_completed(futures), total=len(futures), desc="Downloading"):
|
|
result = future.result()
|
|
if result.startswith("Failed") or result.startswith("Skipped"):
|
|
print(result)
|
|
|
|
print("Download complete!")
|