big refactor go brrr *wags tail* uwu
patchie notesies >w< : new stuff arf arf!! - unified downloader!! one pawsome script for e621, konachan + yandere - sqlite db saves all imgs + tags so no more icky duplicates woof - api keys live in .env now so they stop being naked in the code omg - requirements.txt so setup is ez pz for good girls web ui glow up *zoomies*!! - tag search replaces boring folder picker yay!! - tag sidebar shows all tags on current pic w/ counts, click to search or + to add uwu - get images button right in the ui!! pick ur site n go!! - live download progress bar via SSE *bouncy bouncy* - press E to open the og post on the booru site hehe - fullscreen zoom + pan w/ scroll wheel + drag drag - reload remembers which pic u were on !! no more jumping to 1 >:( deleted stuff rip!! - Catgirl.py (rip girlie u served well) - get_pics_621/kona/yan.py (all grown up into downloader.py now)
This commit is contained in:
2
.env.example
Normal file
2
.env.example
Normal file
@@ -0,0 +1,2 @@
|
||||
E621_LOGIN=your_e621_username
|
||||
E621_API_KEY=your_e621_api_key
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1 +1,3 @@
|
||||
Pictures/
|
||||
Pictures/
|
||||
.env
|
||||
booru.db
|
||||
158
Catgirl.py
158
Catgirl.py
@@ -1,158 +0,0 @@
|
||||
import urllib.request
|
||||
|
||||
tags = "wedding_dress" #separate with +
|
||||
rating = "rating%3A" + "all" #all, safe, questionable, questionableplus, explicit
|
||||
order = "order%3A" + "date" #date, fav, score, random, wide, nonwide
|
||||
site = "konachan.com" #konachan.com, yande.re, danbooru.donmai.us
|
||||
downloadnummer = 1
|
||||
|
||||
def konaBild(nummer):
|
||||
postnummer = nummer
|
||||
|
||||
|
||||
url = 'https://'+site+'/post.xml?page='+str(postnummer)+'&limit=1&tags=' + tags + '+' + order + '+' + rating
|
||||
req = urllib.request.Request(url, headers={'User-Agent': 'XYZ/3.0'})
|
||||
response = urllib.request.urlopen(req, timeout=100)
|
||||
text = str(response.read())
|
||||
|
||||
if len(text)<100:
|
||||
print("end of list")
|
||||
return 403
|
||||
|
||||
|
||||
text = formatText(text)
|
||||
#Debugtext(text)
|
||||
urllib.request.urlretrieve(getBild(text), "konachan/" + getId(text) + ".jpg")
|
||||
return ("konachan/" + getId(text) + ".jpg")
|
||||
|
||||
|
||||
def getAll():
|
||||
postnummer = 1
|
||||
url = 'https://'+site+'/post.xml?page='+str(postnummer)+'&limit=100&tags=' + tags + '+' + rating
|
||||
req = urllib.request.Request(url, headers={'User-Agent': 'XYZ/3.0'})
|
||||
response = urllib.request.urlopen(req, timeout=100)
|
||||
text = str(response.read())
|
||||
|
||||
postanzahl = int(getPostanzahl(text))
|
||||
|
||||
print("Postanzahl: "+ str(postanzahl))
|
||||
|
||||
for i in range(int(postanzahl/100)+1):
|
||||
konaMasse(i+1)
|
||||
|
||||
def getPostanzahl(input):
|
||||
output = ""
|
||||
CountStart = input.find('posts count')
|
||||
CountStart = CountStart + 13
|
||||
while 1:
|
||||
if input[CountStart]=="\"":
|
||||
break
|
||||
output = output + input[CountStart]
|
||||
CountStart = CountStart + 1
|
||||
return output
|
||||
|
||||
|
||||
def konaMasse(nummer):
|
||||
postnummer = nummer
|
||||
url = 'https://'+site+'/post.xml?page='+str(postnummer)+'&limit=100&tags=' + tags + '+' + rating #loli+rating%3Aexplicit
|
||||
req = urllib.request.Request(url, headers={'User-Agent': 'XYZ/3.0'})
|
||||
response = urllib.request.urlopen(req, timeout=100)
|
||||
text = str(response.read())
|
||||
|
||||
text = formatText(text)
|
||||
#print(text[0:20000])
|
||||
|
||||
for i in range(110):
|
||||
global downloadnummer
|
||||
|
||||
if len(text)<1000:
|
||||
print("end of list")
|
||||
return 403
|
||||
|
||||
urllib.request.urlretrieve(getBild(text), "konachan/" + getId(text) + ".jpg")
|
||||
print("Downloadnummer:" + str(downloadnummer))
|
||||
downloadnummer = downloadnummer + 1
|
||||
print(getBild(text))
|
||||
print("Id:"+getId(text))
|
||||
print("len:"+str(len(text)))
|
||||
print(text.find("sample_file_size="))
|
||||
text = text[text.find("sample_file_size=")+30:]
|
||||
|
||||
|
||||
|
||||
|
||||
def Debugtext(text):
|
||||
print (text)
|
||||
print(getBild(text))
|
||||
print(getId(text))
|
||||
print ("\n\n\n\n")
|
||||
|
||||
def formatText(input):
|
||||
schrägstrich = 0
|
||||
fuehrungzeichen = 0
|
||||
output = ""
|
||||
for x in input:
|
||||
if x=="\"":
|
||||
if fuehrungzeichen:
|
||||
output = output + x
|
||||
fuehrungzeichen = 1
|
||||
continue
|
||||
|
||||
if x=="\\":
|
||||
if schrägstrich:
|
||||
output = output + x
|
||||
schrägstrich = 1
|
||||
continue
|
||||
|
||||
if fuehrungzeichen&(x==" "):
|
||||
output = output + "\"\n"
|
||||
fuehrungzeichen = 0
|
||||
continue
|
||||
|
||||
if schrägstrich&(x=="n"):
|
||||
output = output + "\n"
|
||||
schrägstrich = 0
|
||||
continue
|
||||
|
||||
if fuehrungzeichen:
|
||||
output = output + "\""
|
||||
fuehrungzeichen = 0
|
||||
|
||||
if schrägstrich:
|
||||
output = output + "\\"
|
||||
schrägstrich = 0
|
||||
|
||||
output = output + x
|
||||
return output
|
||||
|
||||
def getBild(input):
|
||||
output = ""
|
||||
UrlStart = input.find('file_url')
|
||||
UrlStart = UrlStart + 10
|
||||
while 1:
|
||||
if input[UrlStart]=="\"":
|
||||
break
|
||||
output = output + input[UrlStart]
|
||||
UrlStart = UrlStart + 1
|
||||
return output
|
||||
|
||||
def getId(input):
|
||||
output = ""
|
||||
IdStart = input.find('\nid=')
|
||||
IdStart = IdStart + 5
|
||||
while 1:
|
||||
if input[IdStart]=="\"":
|
||||
break
|
||||
output = output + input[IdStart]
|
||||
IdStart = IdStart + 1
|
||||
return output
|
||||
|
||||
#for i in range(60):
|
||||
#print(i)
|
||||
#konaBild(i)
|
||||
#konaBild(9)
|
||||
#konaMasse(1)
|
||||
getAll()
|
||||
|
||||
|
||||
#urllib.request.urlretrieve("https://i.pinimg.com/236x/27/5b/57/275b57e1d12078cb48891894a78400e0.jpg", "local-filename.jpg")
|
||||
94
Slideshow.py
94
Slideshow.py
@@ -1,19 +1,93 @@
|
||||
from flask import Flask, render_template, request
|
||||
import os
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from flask import Flask, render_template, request, redirect, url_for, Response
|
||||
|
||||
from db import init_db, search_images
|
||||
|
||||
app = Flask(__name__, static_folder='Pictures', static_url_path='/pictures')
|
||||
init_db()
|
||||
|
||||
# job_id -> {'done': int, 'total': int, 'finished': bool, 'site': str, 'tags': str}
|
||||
downloads = {}
|
||||
|
||||
app = Flask(__name__, static_folder='Pictures')
|
||||
|
||||
@app.route('/')
|
||||
def slideshow():
|
||||
base_folder = 'Pictures'
|
||||
folders = [f for f in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, f))]
|
||||
raw_query = request.args.get('tags', '').strip()
|
||||
results = search_images(raw_query)
|
||||
image_urls = [f'/pictures/{r["filename"]}' for r in results]
|
||||
post_urls = [r['post_url'] for r in results]
|
||||
tags_list = [r['tags'].split() for r in results]
|
||||
active_tags = raw_query.split() if raw_query else []
|
||||
job_id = request.args.get('job_id')
|
||||
return render_template(
|
||||
'slideshow.html',
|
||||
images=image_urls,
|
||||
post_urls=post_urls,
|
||||
tags_list=tags_list,
|
||||
active_tags=active_tags,
|
||||
tag_query=raw_query,
|
||||
job_id=job_id,
|
||||
)
|
||||
|
||||
selected = request.args.get('folder', 'test')
|
||||
image_folder = os.path.join(base_folder, selected)
|
||||
images = [f'/{image_folder}/{f}' for f in os.listdir(image_folder)
|
||||
if f.lower().endswith(('png', 'jpg', 'jpeg', 'gif'))]
|
||||
|
||||
return render_template('slideshow.html', images=images, folders=folders, selected=selected)
|
||||
@app.route('/download', methods=['POST'])
|
||||
def download():
|
||||
tags = request.form.get('tags', '').strip()
|
||||
site = request.form.get('site', 'e621')
|
||||
if not tags:
|
||||
return redirect(url_for('slideshow', tags=tags))
|
||||
|
||||
job_id = uuid.uuid4().hex[:8]
|
||||
downloads[job_id] = {'done': 0, 'total': 0, 'finished': False, 'site': site, 'tags': tags}
|
||||
|
||||
proc = subprocess.Popen(
|
||||
[sys.executable, 'downloader.py', '--site', site, '--query', tags],
|
||||
cwd=app.root_path,
|
||||
stdout=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
|
||||
def read_stdout():
|
||||
for line in proc.stdout:
|
||||
line = line.strip()
|
||||
if line.startswith('total:'):
|
||||
downloads[job_id]['total'] = int(line.split(':')[1])
|
||||
elif line.startswith('progress:'):
|
||||
done, total = line.split(':')[1].split('/')
|
||||
downloads[job_id]['done'] = int(done)
|
||||
downloads[job_id]['total'] = int(total)
|
||||
elif line == 'done':
|
||||
downloads[job_id]['finished'] = True
|
||||
downloads[job_id]['finished'] = True
|
||||
|
||||
threading.Thread(target=read_stdout, daemon=True).start()
|
||||
return redirect(url_for('slideshow', tags=tags, job_id=job_id))
|
||||
|
||||
|
||||
@app.route('/download/progress/<job_id>')
|
||||
def download_progress(job_id):
|
||||
def generate():
|
||||
while True:
|
||||
info = downloads.get(job_id)
|
||||
if not info:
|
||||
yield f'data: {json.dumps({"error": "not found"})}\n\n'
|
||||
break
|
||||
yield f'data: {json.dumps({"done": info["done"], "total": info["total"], "finished": info["finished"]})}\n\n'
|
||||
if info['finished']:
|
||||
break
|
||||
time.sleep(0.3)
|
||||
|
||||
return Response(
|
||||
generate(),
|
||||
mimetype='text/event-stream',
|
||||
headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no'},
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(debug=True)
|
||||
|
||||
65
db.py
Normal file
65
db.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import os
|
||||
import sqlite3
|
||||
|
||||
DB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'booru.db')
|
||||
|
||||
|
||||
def get_conn():
|
||||
return sqlite3.connect(DB_PATH)
|
||||
|
||||
|
||||
def init_db():
|
||||
with get_conn() as c:
|
||||
c.execute("""CREATE TABLE IF NOT EXISTS images (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
post_id TEXT NOT NULL,
|
||||
site TEXT NOT NULL,
|
||||
filename TEXT NOT NULL UNIQUE,
|
||||
tags TEXT NOT NULL DEFAULT '',
|
||||
file_url TEXT NOT NULL,
|
||||
post_url TEXT NOT NULL DEFAULT '',
|
||||
downloaded_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
)""")
|
||||
c.execute(
|
||||
"CREATE UNIQUE INDEX IF NOT EXISTS idx_site_post ON images (site, post_id)"
|
||||
)
|
||||
# migrate existing DBs that predate the post_url column
|
||||
try:
|
||||
c.execute("ALTER TABLE images ADD COLUMN post_url TEXT NOT NULL DEFAULT ''")
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
|
||||
|
||||
def image_exists(site, post_id):
|
||||
with get_conn() as c:
|
||||
return (
|
||||
c.execute(
|
||||
"SELECT 1 FROM images WHERE site=? AND post_id=?", (site, post_id)
|
||||
).fetchone()
|
||||
is not None
|
||||
)
|
||||
|
||||
|
||||
def insert_image(post_id, site, filename, tags, file_url, post_url):
|
||||
with get_conn() as c:
|
||||
c.execute(
|
||||
"INSERT OR IGNORE INTO images (post_id, site, filename, tags, file_url, post_url) VALUES (?,?,?,?,?,?)",
|
||||
(post_id, site, filename, tags, file_url, post_url),
|
||||
)
|
||||
|
||||
|
||||
def search_images(tag_query):
|
||||
terms = tag_query.split() if tag_query.strip() else []
|
||||
with get_conn() as c:
|
||||
if not terms:
|
||||
rows = c.execute(
|
||||
"SELECT filename, post_url, tags FROM images ORDER BY id"
|
||||
).fetchall()
|
||||
else:
|
||||
where = ' AND '.join(['tags LIKE ?'] * len(terms))
|
||||
params = [f'%{t}%' for t in terms]
|
||||
rows = c.execute(
|
||||
f"SELECT filename, post_url, tags FROM images WHERE {where} ORDER BY id",
|
||||
params,
|
||||
).fetchall()
|
||||
return [{'filename': r[0], 'post_url': r[1], 'tags': r[2]} for r in rows]
|
||||
154
downloader.py
Normal file
154
downloader.py
Normal file
@@ -0,0 +1,154 @@
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import requests
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from tqdm import tqdm
|
||||
from dotenv import load_dotenv
|
||||
from db import init_db, image_exists, insert_image
|
||||
|
||||
|
||||
def parse_e621(post):
|
||||
file_url = post.get('file', {}).get('url')
|
||||
if not file_url:
|
||||
return None
|
||||
tags = ' '.join(t for cat in post.get('tags', {}).values() for t in cat)
|
||||
return str(post['id']), file_url, tags
|
||||
|
||||
|
||||
def parse_moebooru(post):
|
||||
file_url = post.get('file_url')
|
||||
if not file_url:
|
||||
return None
|
||||
return str(post['id']), file_url, post.get('tags', '')
|
||||
|
||||
|
||||
SITES = {
|
||||
'e621': {
|
||||
'base_url': 'https://e621.net/',
|
||||
'endpoint': 'posts.json',
|
||||
'per_page': 100,
|
||||
'threads': 8,
|
||||
'envelope': 'posts',
|
||||
'parse': parse_e621,
|
||||
'post_url_fmt': 'https://e621.net/posts/{post_id}',
|
||||
},
|
||||
'konachan': {
|
||||
'base_url': 'https://konachan.com/',
|
||||
'endpoint': 'post.json',
|
||||
'per_page': 100,
|
||||
'threads': 20,
|
||||
'envelope': None,
|
||||
'parse': parse_moebooru,
|
||||
'post_url_fmt': 'https://konachan.com/post/show/{post_id}',
|
||||
},
|
||||
'yandere': {
|
||||
'base_url': 'https://yande.re/',
|
||||
'endpoint': 'post.json',
|
||||
'per_page': 100,
|
||||
'threads': 20,
|
||||
'envelope': None,
|
||||
'parse': parse_moebooru,
|
||||
'post_url_fmt': 'https://yande.re/post/show/{post_id}',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def fetch_all_posts(adapter, query, auth, limit):
|
||||
posts, page = [], 1
|
||||
session = requests.Session()
|
||||
session.headers['User-Agent'] = 'booru-downloader/2.0 (personal archiver)'
|
||||
while len(posts) < limit:
|
||||
params = {'tags': query, 'limit': adapter['per_page'], 'page': page}
|
||||
if auth:
|
||||
params.update(auth)
|
||||
r = session.get(
|
||||
adapter['base_url'] + adapter['endpoint'], params=params, timeout=30
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
batch = data[adapter['envelope']] if adapter['envelope'] else data
|
||||
if not batch:
|
||||
break
|
||||
posts.extend(batch)
|
||||
if len(batch) < adapter['per_page']:
|
||||
break
|
||||
page += 1
|
||||
return posts[:limit]
|
||||
|
||||
|
||||
def download_one(post, site_name, adapter, pictures_dir, session):
|
||||
parsed = adapter['parse'](post)
|
||||
if not parsed:
|
||||
return 'skip:no_url'
|
||||
post_id, file_url, tags = parsed
|
||||
if image_exists(site_name, post_id):
|
||||
return f'skip:dup:{post_id}'
|
||||
clean_url = file_url.split('?')[0]
|
||||
ext = os.path.splitext(clean_url)[1]
|
||||
filename = f'{site_name}_{post_id}{ext}'
|
||||
dest = os.path.join(pictures_dir, filename)
|
||||
r = session.get(file_url, timeout=60)
|
||||
r.raise_for_status()
|
||||
with open(dest, 'wb') as f:
|
||||
f.write(r.content)
|
||||
post_url = adapter['post_url_fmt'].format(post_id=post_id)
|
||||
insert_image(post_id, site_name, filename, tags, file_url, post_url)
|
||||
return f'ok:{filename}'
|
||||
|
||||
|
||||
def main():
|
||||
load_dotenv()
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Download booru images to unified store'
|
||||
)
|
||||
parser.add_argument('--site', required=True, choices=SITES.keys())
|
||||
parser.add_argument('--query', required=True, help='Space-separated tags to search')
|
||||
parser.add_argument('--limit', type=int, default=10_000)
|
||||
args = parser.parse_args()
|
||||
|
||||
init_db()
|
||||
adapter = SITES[args.site]
|
||||
|
||||
auth = None
|
||||
if args.site == 'e621':
|
||||
login = os.environ.get('E621_LOGIN')
|
||||
api_key = os.environ.get('E621_API_KEY')
|
||||
if not login or not api_key:
|
||||
raise KeyError(
|
||||
'E621_LOGIN and E621_API_KEY must be set in .env — see .env.example'
|
||||
)
|
||||
auth = {'login': login, 'api_key': api_key}
|
||||
|
||||
pictures_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'Pictures')
|
||||
os.makedirs(pictures_dir, exist_ok=True)
|
||||
|
||||
print(f'Fetching post list from {args.site} for query: {args.query!r}')
|
||||
posts = fetch_all_posts(adapter, args.query, auth, args.limit)
|
||||
total = len(posts)
|
||||
print(f'Got {total} posts, downloading...')
|
||||
|
||||
# structured progress lines on stdout when piped to Flask; tqdm bar on stderr for console
|
||||
piped = not sys.stdout.isatty()
|
||||
if piped:
|
||||
print(f'total:{total}', flush=True)
|
||||
|
||||
session = requests.Session()
|
||||
session.headers['User-Agent'] = 'booru-downloader/2.0 (personal archiver)'
|
||||
done = 0
|
||||
with ThreadPoolExecutor(max_workers=adapter['threads']) as pool:
|
||||
futures = {
|
||||
pool.submit(download_one, p, args.site, adapter, pictures_dir, session): p
|
||||
for p in posts
|
||||
}
|
||||
for _ in tqdm(as_completed(futures), total=total, file=sys.stderr):
|
||||
done += 1
|
||||
if piped:
|
||||
print(f'progress:{done}/{total}', flush=True)
|
||||
|
||||
if piped:
|
||||
print('done', flush=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,91 +0,0 @@
|
||||
import os
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
URL = "https://e621.net/" #https://e621.net/
|
||||
# === CONFIGURATION ===
|
||||
QUERY = "skidoo" # Replace with your search query
|
||||
SAVE_DIR = "Pictures/skidoo"
|
||||
LIMIT = 10000
|
||||
PER_PAGE = 100 # API max is 320, but let's be safe
|
||||
USER_AGENT = "e621-downloader/1.0 (by Burrson on e621)"
|
||||
LOGIN = "Burrson"
|
||||
API_KEY = "gRBZhdfG9qkMPStyia6k7mFX"
|
||||
THREADS = 100 # Number of parallel download threads
|
||||
|
||||
# Ensure save directory exists
|
||||
os.makedirs(SAVE_DIR, exist_ok=True)
|
||||
|
||||
headers = {"User-Agent": USER_AGENT}
|
||||
|
||||
all_posts = []
|
||||
current_page = 1
|
||||
total_fetched = 0
|
||||
|
||||
print("Fetching posts in batches...")
|
||||
|
||||
while total_fetched < LIMIT:
|
||||
params = {
|
||||
"tags": QUERY,
|
||||
"limit": PER_PAGE,
|
||||
"page": current_page,
|
||||
"login": LOGIN,
|
||||
"api_key": API_KEY,
|
||||
}
|
||||
|
||||
response = requests.get(URL + "posts.json", headers=headers, params=params)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
#print(data[0]["tags"])
|
||||
posts = data.get("posts", [])
|
||||
#print (posts)
|
||||
if not posts:
|
||||
print(f"No more posts found at page {current_page}.")
|
||||
break
|
||||
|
||||
all_posts.extend(posts)
|
||||
total_fetched += len(posts)
|
||||
|
||||
print(f"Fetched {len(posts)} posts from page {current_page}. Total so far: {total_fetched}.")
|
||||
|
||||
current_page += 1
|
||||
|
||||
# Stop if we fetched fewer than requested in this batch (meaning it's probably the end)
|
||||
if len(posts) < PER_PAGE:
|
||||
break
|
||||
|
||||
# Trim to desired limit, just in case we overfetched
|
||||
all_posts = all_posts[:LIMIT]
|
||||
print(f"Total posts collected for download: {len(all_posts)}")
|
||||
|
||||
|
||||
def download_file(post):
|
||||
file_url = post.get("file", {}).get("url")
|
||||
if not file_url:
|
||||
return f"Skipped: No file URL for post {post.get('id')}"
|
||||
|
||||
#file_name = os.path.basename(file_url)
|
||||
file_name = str(post["id"]) + os.path.splitext(file_url)[1]
|
||||
file_path = os.path.join(SAVE_DIR, file_name)
|
||||
|
||||
try:
|
||||
file_response = requests.get(file_url, headers=headers)
|
||||
file_response.raise_for_status()
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(file_response.content)
|
||||
return f"Downloaded: {file_name}"
|
||||
except Exception as e:
|
||||
return f"Failed: {file_url} with error {e}"
|
||||
|
||||
|
||||
print("Starting download...")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=THREADS) as executor:
|
||||
futures = [executor.submit(download_file, post) for post in all_posts]
|
||||
for future in tqdm(as_completed(futures), total=len(futures), desc="Downloading"):
|
||||
result = future.result()
|
||||
if result.startswith("Failed") or result.startswith("Skipped"):
|
||||
print(result)
|
||||
|
||||
print("Download complete!")
|
||||
@@ -1,88 +0,0 @@
|
||||
import os
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
URL = "https://konachan.com/" #https://e621.net/
|
||||
# === CONFIGURATION ===
|
||||
QUERY = "pajamas panties" # Replace with your search query
|
||||
SAVE_DIR = "Pictures/pajamas_panties_kona"
|
||||
LIMIT = 10000
|
||||
PER_PAGE = 100 # API max is 320, but let's be safe
|
||||
USER_AGENT = "e621-downloader/1.0 (by username on e621)" # Replace 'username' with your e621 username
|
||||
THREADS = 100 # Number of parallel download threads
|
||||
|
||||
# Ensure save directory exists
|
||||
os.makedirs(SAVE_DIR, exist_ok=True)
|
||||
|
||||
headers = {"User-Agent": USER_AGENT}
|
||||
|
||||
all_posts = []
|
||||
current_page = 1
|
||||
total_fetched = 0
|
||||
|
||||
print("Fetching posts in batches...")
|
||||
|
||||
while total_fetched < LIMIT:
|
||||
params = {
|
||||
"tags": QUERY,
|
||||
"limit": PER_PAGE,
|
||||
"page": current_page,
|
||||
}
|
||||
|
||||
response = requests.get(URL + "post.json", headers=headers, params=params)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
#print(data[0]["tags"])
|
||||
posts = data
|
||||
#posts = data.get("posts", [])
|
||||
if not posts:
|
||||
print(f"No more posts found at page {current_page}.")
|
||||
break
|
||||
|
||||
all_posts.extend(posts)
|
||||
total_fetched += len(posts)
|
||||
|
||||
print(f"Fetched {len(posts)} posts from page {current_page}. Total so far: {total_fetched}.")
|
||||
|
||||
current_page += 1
|
||||
|
||||
# Stop if we fetched fewer than requested in this batch (meaning it's probably the end)
|
||||
if len(posts) < PER_PAGE:
|
||||
break
|
||||
|
||||
# Trim to desired limit, just in case we overfetched
|
||||
all_posts = all_posts[:LIMIT]
|
||||
print(f"Total posts collected for download: {len(all_posts)}")
|
||||
|
||||
|
||||
def download_file(post):
|
||||
#file_url = post.get("file", {}).get("url")
|
||||
file_url = post["file_url"]
|
||||
if not file_url:
|
||||
return f"Skipped: No file URL for post {post.get('id')}"
|
||||
|
||||
#file_name = os.path.basename(file_url)
|
||||
file_name = str(post["id"]) + os.path.splitext(post["file_url"])[1]
|
||||
file_path = os.path.join(SAVE_DIR, file_name)
|
||||
|
||||
try:
|
||||
file_response = requests.get(file_url, headers=headers)
|
||||
file_response.raise_for_status()
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(file_response.content)
|
||||
return f"Downloaded: {file_name}"
|
||||
except Exception as e:
|
||||
return f"Failed: {file_url} with error {e}"
|
||||
|
||||
|
||||
print("Starting download...")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=THREADS) as executor:
|
||||
futures = [executor.submit(download_file, post) for post in all_posts]
|
||||
for future in tqdm(as_completed(futures), total=len(futures), desc="Downloading"):
|
||||
result = future.result()
|
||||
if result.startswith("Failed") or result.startswith("Skipped"):
|
||||
print(result)
|
||||
|
||||
print("Download complete!")
|
||||
@@ -1,89 +0,0 @@
|
||||
import os
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
URL = "https://yande.re/" #https://e621.net/
|
||||
# === CONFIGURATION ===
|
||||
QUERY = "loli swimsuits" # Replace with your search query
|
||||
SAVE_DIR = "Pictures/lori_swim"
|
||||
LIMIT = 10000
|
||||
PER_PAGE = 100 # API max is 320, but let's be safe
|
||||
USER_AGENT = "e621-downloader/1.0 (by username on e621)" # Replace 'username' with your e621 username
|
||||
THREADS = 100 # Number of parallel download threads
|
||||
|
||||
# Ensure save directory exists
|
||||
os.makedirs(SAVE_DIR, exist_ok=True)
|
||||
|
||||
headers = {"User-Agent": USER_AGENT}
|
||||
|
||||
all_posts = []
|
||||
current_page = 1
|
||||
total_fetched = 0
|
||||
|
||||
print("Fetching posts in batches...")
|
||||
|
||||
while total_fetched < LIMIT:
|
||||
params = {
|
||||
"tags": QUERY,
|
||||
"limit": PER_PAGE,
|
||||
"page": current_page,
|
||||
}
|
||||
|
||||
response = requests.get(URL + "post.json", headers=headers, params=params)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
#print(data[0]["tags"])
|
||||
posts = data
|
||||
#print (posts)
|
||||
#posts = data.get("posts", [])
|
||||
if not posts:
|
||||
print(f"No more posts found at page {current_page}.")
|
||||
break
|
||||
|
||||
all_posts.extend(posts)
|
||||
total_fetched += len(posts)
|
||||
|
||||
print(f"Fetched {len(posts)} posts from page {current_page}. Total so far: {total_fetched}.")
|
||||
|
||||
current_page += 1
|
||||
|
||||
# Stop if we fetched fewer than requested in this batch (meaning it's probably the end)
|
||||
if len(posts) < PER_PAGE:
|
||||
break
|
||||
|
||||
# Trim to desired limit, just in case we overfetched
|
||||
all_posts = all_posts[:LIMIT]
|
||||
print(f"Total posts collected for download: {len(all_posts)}")
|
||||
|
||||
|
||||
def download_file(post):
|
||||
#file_url = post.get("file", {}).get("url")
|
||||
file_url = post["file_url"]
|
||||
if not file_url:
|
||||
return f"Skipped: No file URL for post {post.get('id')}"
|
||||
|
||||
#file_name = os.path.basename(file_url)
|
||||
file_name = str(post["id"]) + os.path.splitext(post["file_url"])[1]
|
||||
file_path = os.path.join(SAVE_DIR, file_name)
|
||||
|
||||
try:
|
||||
file_response = requests.get(file_url, headers=headers)
|
||||
file_response.raise_for_status()
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(file_response.content)
|
||||
return f"Downloaded: {file_name}"
|
||||
except Exception as e:
|
||||
return f"Failed: {file_url} with error {e}"
|
||||
|
||||
|
||||
print("Starting download...")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=THREADS) as executor:
|
||||
futures = [executor.submit(download_file, post) for post in all_posts]
|
||||
for future in tqdm(as_completed(futures), total=len(futures), desc="Downloading"):
|
||||
result = future.result()
|
||||
if result.startswith("Failed") or result.startswith("Skipped"):
|
||||
print(result)
|
||||
|
||||
print("Download complete!")
|
||||
4
requirements.txt
Normal file
4
requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
flask>=3.0
|
||||
requests>=2.32
|
||||
tqdm>=4.66
|
||||
python-dotenv>=1.0
|
||||
@@ -1,7 +1,7 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Slideshow</title>
|
||||
<title>Booru Slideshow</title>
|
||||
<style>
|
||||
body{background:#111;color:#fff;text-align:center;margin:0}
|
||||
h1{margin-top:2vh}
|
||||
@@ -13,55 +13,219 @@
|
||||
height:100vh;
|
||||
object-fit:contain;
|
||||
margin:0;
|
||||
cursor:default;
|
||||
}
|
||||
|
||||
#controls{margin-top:2vh}
|
||||
button{padding:.6rem 1.1rem;font-size:1rem;margin:.2rem;border:0;border-radius:.4rem;cursor:pointer}
|
||||
|
||||
#counter{margin-top:1vh;font-size:1.1rem}
|
||||
|
||||
#tag-form{margin-top:1.5vh}
|
||||
#tag-form input[type=text]{padding:.5rem .8rem;font-size:1rem;border-radius:.4rem;border:0;width:20rem;background:#333;color:#fff}
|
||||
#tag-form button{background:#444;color:#fff}
|
||||
.tag-chip{display:inline-block;background:#335;border-radius:1rem;padding:.2rem .7rem;margin:.2rem;font-size:.85rem}
|
||||
#no-results{font-size:1.4rem;margin-top:10vh;color:#888}
|
||||
#site-select{padding:.5rem .6rem;font-size:1rem;border-radius:.4rem;border:0;background:#333;color:#fff;margin-left:.4rem}
|
||||
#view{display:flex;justify-content:center;align-items:flex-start;margin-top:2vh;gap:1rem}
|
||||
#tag-sidebar{width:180px;max-height:80vh;overflow-y:auto;background:#1a1a1a;border-radius:.5rem;padding:.5rem;flex-shrink:0;text-align:left}
|
||||
.tag-row{display:flex;align-items:baseline;gap:.3rem}
|
||||
.tag-row a{color:#aaa;font-size:.8rem;padding:.15rem .3rem;border-radius:.3rem;text-decoration:none;word-break:break-all}
|
||||
.tag-row a:hover{background:#333;color:#fff}
|
||||
.tag-row .tag-plus{flex-shrink:0;color:#555;font-size:.9rem;width:.9rem;text-align:center;padding:.15rem .1rem}
|
||||
.tag-row .tag-plus:hover{color:#8b8;background:none}
|
||||
.tag-count{font-size:.7rem;color:#555;flex-shrink:0}
|
||||
#image-area{flex:1;min-width:0;text-align:center}
|
||||
#image-area img{max-width:90%;max-height:80vh}
|
||||
#progress-box{margin-top:1vh;display:none}
|
||||
#progress-bar-wrap{width:24rem;height:1rem;background:#333;border-radius:.5rem;display:inline-block;overflow:hidden;vertical-align:middle}
|
||||
#progress-bar{height:100%;width:0%;background:#5a5;transition:width .2s}
|
||||
#progress-label{margin-left:.6rem;font-size:.95rem;color:#8b8;vertical-align:middle}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Slideshow</h1>
|
||||
<h1>Booru Slideshow</h1>
|
||||
|
||||
<img id="slide" src="{{ images[0] }}"/>
|
||||
|
||||
<div id="controls">
|
||||
<button id="prev">◀︎ Prev</button>
|
||||
<button id="toggle">Start</button>
|
||||
<button id="next">Next ▶︎</button>
|
||||
<button id="fullscreen">⛶ Fullscreen</button>
|
||||
<button id="shuffle">Shuffle</button>
|
||||
<select id="folder-select" onchange="changeFolder(this.value)">
|
||||
{% for folder in folders %}
|
||||
<option value="{{ folder }}" {% if folder == selected %}selected{% endif %}>{{ folder }}</option>
|
||||
{% endfor %}
|
||||
<form id="tag-form" method="get" action="/">
|
||||
<input type="text" name="tags" id="tag-input" placeholder="e.g. swimsuit blonde_hair" value="{{ tag_query }}" />
|
||||
<button type="submit">Search</button>
|
||||
<select name="_site" id="site-select">
|
||||
<option value="e621">e621</option>
|
||||
<option value="konachan">konachan</option>
|
||||
<option value="yandere">yandere</option>
|
||||
</select>
|
||||
<input id="delay" type="number" min="1" value="15" style="width:4rem" />
|
||||
<label for="delay">sec</label>
|
||||
<button type="button" id="get-btn">Get Images</button>
|
||||
</form>
|
||||
<div id="progress-box">
|
||||
<div id="progress-bar-wrap"><div id="progress-bar"></div></div>
|
||||
<span id="progress-label">Starting...</span>
|
||||
</div>
|
||||
|
||||
<div id="counter">1 / {{ images|length }}</div>
|
||||
{% if active_tags %}
|
||||
<div>{% for tag in active_tags %}<span class="tag-chip">{{ tag }}</span>{% endfor %}</div>
|
||||
{% endif %}
|
||||
|
||||
{% if images %}
|
||||
<div id="view">
|
||||
<div id="tag-sidebar"></div>
|
||||
<div id="image-area">
|
||||
<img id="slide" src="{{ images[0] }}" />
|
||||
<div id="controls">
|
||||
<button id="prev">◀︎ Prev</button>
|
||||
<button id="toggle">Play</button>
|
||||
<button id="next">Next ▶︎</button>
|
||||
<button id="fullscreen">⛶ Fullscreen</button>
|
||||
<button id="shuffle">Shuffle</button>
|
||||
<input id="delay" type="number" value="15" style="width:4rem" />
|
||||
<label for="delay">sec</label>
|
||||
</div>
|
||||
<div id="counter">1 / {{ images|length }}</div>
|
||||
</div>
|
||||
</div>
|
||||
{% else %}
|
||||
<div id="no-results">
|
||||
{% if tag_query %}No images found for "{{ tag_query }}".{% else %}No images yet — run the downloader first.{% endif %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<script>
|
||||
const images = {{ images|tojson }};
|
||||
document.getElementById('get-btn').onclick = () => {
|
||||
const tags = document.getElementById('tag-input').value.trim();
|
||||
const site = document.getElementById('site-select').value;
|
||||
const form = document.createElement('form');
|
||||
form.method = 'post';
|
||||
form.action = '/download';
|
||||
[['tags', tags], ['site', site]].forEach(([k, v]) => {
|
||||
const inp = document.createElement('input');
|
||||
inp.type = 'hidden'; inp.name = k; inp.value = v;
|
||||
form.appendChild(inp);
|
||||
});
|
||||
document.body.appendChild(form);
|
||||
form.submit();
|
||||
};
|
||||
|
||||
const job_id = new URLSearchParams(window.location.search).get('job_id');
|
||||
if (job_id) {
|
||||
const box = document.getElementById('progress-box');
|
||||
const bar = document.getElementById('progress-bar');
|
||||
const label = document.getElementById('progress-label');
|
||||
box.style.display = 'block';
|
||||
const es = new EventSource(`/download/progress/${job_id}`);
|
||||
es.onmessage = (e) => {
|
||||
const d = JSON.parse(e.data);
|
||||
if (d.error) { label.textContent = 'Error: ' + d.error; es.close(); return; }
|
||||
const pct = d.total > 0 ? Math.round(d.done / d.total * 100) : 0;
|
||||
bar.style.width = pct + '%';
|
||||
if (d.finished) {
|
||||
label.innerHTML = `Done! ${d.done} images. <a href="/?tags={{ tag_query }}" style="color:#8b8">Refresh</a>`;
|
||||
es.close();
|
||||
} else {
|
||||
label.textContent = d.total > 0 ? `${d.done} / ${d.total}` : 'Fetching post list...';
|
||||
}
|
||||
};
|
||||
}
|
||||
</script>
|
||||
|
||||
{% if images %}
|
||||
<script>
|
||||
const images = {{ images|tojson }};
|
||||
const post_urls = {{ post_urls|tojson }};
|
||||
const tags_list = {{ tags_list|tojson }};
|
||||
const sidebar = document.getElementById('tag-sidebar');
|
||||
|
||||
const current_query = {{ tag_query|tojson }};
|
||||
|
||||
const tag_counts = {};
|
||||
for (const tags of tags_list) {
|
||||
for (const t of tags) {
|
||||
tag_counts[t] = (tag_counts[t] || 0) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
function render_tags(n) {
|
||||
const tags = tags_list[n] || [];
|
||||
sidebar.innerHTML = tags.map(t => {
|
||||
const existing = current_query.trim().split(/\s+/).filter(Boolean);
|
||||
const combined = [...new Set([...existing, t])].join(' ');
|
||||
const add_url = '/?tags=' + encodeURIComponent(combined);
|
||||
const count = tag_counts[t] || 1;
|
||||
return `<div class="tag-row">` +
|
||||
`<a class="tag-plus" href="${add_url}" title="Add to query">+</a>` +
|
||||
`<a href="/?tags=${encodeURIComponent(t)}">${t}</a>` +
|
||||
`<span class="tag-count">${count}</span>` +
|
||||
`</div>`;
|
||||
}).join('');
|
||||
}
|
||||
const img = document.getElementById('slide');
|
||||
const btnT = document.getElementById('toggle');
|
||||
const btnF = document.getElementById('fullscreen');
|
||||
const counter= document.getElementById('counter');
|
||||
let i=0, playing=false, timer=null; //, timer=setInterval(next,15000);
|
||||
|
||||
const _params = new URLSearchParams(window.location.search);
|
||||
const _url_idx = parseInt(_params.get('idx') || '0', 10);
|
||||
let i = (_url_idx > 0 && _url_idx < images.length) ? _url_idx : 0;
|
||||
let playing=false, timer=null;
|
||||
let shuffled = false;
|
||||
let order = [...Array(images.length).keys()];
|
||||
|
||||
// zoom/pan state for fullscreen
|
||||
let z_scale=1, z_tx=0, z_ty=0, z_drag=null;
|
||||
|
||||
function reset_zoom(){
|
||||
z_scale=1; z_tx=0; z_ty=0;
|
||||
img.style.transform='';
|
||||
img.style.cursor='';
|
||||
}
|
||||
function apply_zoom(){
|
||||
img.style.transform=`scale(${z_scale}) translate(${z_tx}px,${z_ty}px)`;
|
||||
img.style.cursor = z_scale>1 ? 'grab' : 'default';
|
||||
}
|
||||
img.addEventListener('wheel', e=>{
|
||||
if(!document.fullscreenElement) return;
|
||||
e.preventDefault();
|
||||
const factor = e.deltaY < 0 ? 1.15 : 1/1.15;
|
||||
z_scale = Math.min(10, Math.max(1, z_scale*factor));
|
||||
if(z_scale===1){ z_tx=0; z_ty=0; }
|
||||
apply_zoom();
|
||||
},{passive:false});
|
||||
img.addEventListener('mousedown', e=>{
|
||||
if(!document.fullscreenElement || z_scale<=1) return;
|
||||
z_drag={x: e.clientX - z_tx*z_scale, y: e.clientY - z_ty*z_scale};
|
||||
img.style.cursor='grabbing';
|
||||
});
|
||||
window.addEventListener('mousemove', e=>{
|
||||
if(!z_drag) return;
|
||||
z_tx=(e.clientX-z_drag.x)/z_scale;
|
||||
z_ty=(e.clientY-z_drag.y)/z_scale;
|
||||
apply_zoom();
|
||||
});
|
||||
window.addEventListener('mouseup', ()=>{
|
||||
z_drag=null;
|
||||
if(document.fullscreenElement && z_scale>1) img.style.cursor='grab';
|
||||
});
|
||||
document.addEventListener('fullscreenchange', ()=>{
|
||||
if(!document.fullscreenElement) reset_zoom();
|
||||
});
|
||||
|
||||
function show(n){
|
||||
if (shuffled) n = order[(n+order.length)%order.length];
|
||||
else n = (n+images.length)%images.length;
|
||||
|
||||
i = images.indexOf(images[n]);
|
||||
i = n;
|
||||
img.src = images[n];
|
||||
counter.textContent = `${i+1} / ${images.length}`;
|
||||
render_tags(n);
|
||||
reset_zoom();
|
||||
const p = new URLSearchParams(window.location.search);
|
||||
p.set('idx', i);
|
||||
history.replaceState(null,'','?'+p.toString());
|
||||
}
|
||||
|
||||
// init to url index
|
||||
img.src = images[i];
|
||||
counter.textContent = `${i+1} / ${images.length}`;
|
||||
render_tags(i);
|
||||
|
||||
function toggleShuffle(){
|
||||
shuffled = !shuffled;
|
||||
document.getElementById('shuffle').textContent = shuffled ? 'Unshuffle' : 'Shuffle';
|
||||
@@ -84,18 +248,18 @@
|
||||
btnT.onclick=()=>{
|
||||
playing=!playing;
|
||||
btnT.textContent=playing?'Pause':'Play';
|
||||
if(playing){
|
||||
if(playing){
|
||||
const delaySec = parseInt(document.getElementById('delay').value, 10) || 15;
|
||||
timer = setInterval(next, delaySec * 1000);
|
||||
}
|
||||
else{
|
||||
else{
|
||||
clearInterval(timer);
|
||||
}
|
||||
};
|
||||
|
||||
btnF.onclick=()=>{
|
||||
if(!document.fullscreenElement){
|
||||
img.requestFullscreen(); // only the picture goes fullscreen
|
||||
img.requestFullscreen();
|
||||
}else{
|
||||
document.exitFullscreen();
|
||||
}
|
||||
@@ -104,11 +268,12 @@
|
||||
document.addEventListener('keydown',e=>{
|
||||
if(e.key==='ArrowRight')next();
|
||||
if(e.key==='ArrowLeft') prev();
|
||||
if(e.key==='e' && !window.getSelection().toString() && post_urls[i]){
|
||||
window.open(post_urls[i], '_blank');
|
||||
}
|
||||
});
|
||||
|
||||
function changeFolder(folder){
|
||||
window.location.href = "/?folder=" + encodeURIComponent(folder);
|
||||
}
|
||||
</script>
|
||||
{% endif %}
|
||||
</body>
|
||||
</html>
|
||||
|
||||
Reference in New Issue
Block a user