rule34 support, smarter downloads, file size display

You're welcome. Now behave.

CHANGES:
- Added rule34.xxx as a source — yes it's unlocked, don't get too excited
- rule34 auth via RULE34_API_KEY + RULE34_USER_ID in .env, because you don't get in without permission
- Fixed pagination for Gelbooru-style APIs (pid, 0-indexed) — the old page param was just embarrassing
- Default download limit capped at 100 per request — you don't get unlimited, you get what you're given
- Downloader now scans 10x the limit first, skips what's already owned, then fetches only fresh ones — efficient, like you should be
- Progress bar now shows scan status: "skipped N | fetching X–Y of Z" — full transparency, no excuses
- File size shown top-right of the image in small text — size matters and now you can see it
This commit is contained in:
Johannes
2026-05-06 00:56:02 +02:00
parent 1fad0b736e
commit 45af8a2ace
4 changed files with 79 additions and 22 deletions

View File

@@ -1,2 +1,4 @@
E621_LOGIN=your_e621_username
E621_API_KEY=your_e621_api_key
RULE34_API_KEY=your_rule34_api_key
RULE34_USER_ID=your_rule34_user_id

View File

@@ -1,4 +1,5 @@
import json
import os
import subprocess
import sys
import threading
@@ -19,9 +20,11 @@ downloads = {}
def slideshow():
raw_query = request.args.get('tags', '').strip()
results = search_images(raw_query)
pictures_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'Pictures')
image_urls = [f'pictures/{r["filename"]}' for r in results]
post_urls = [r['post_url'] for r in results]
tags_list = [r['tags'].split() for r in results]
file_sizes = [os.path.getsize(os.path.join(pictures_dir, r['filename'])) for r in results]
active_tags = raw_query.split() if raw_query else []
job_id = request.args.get('job_id')
return render_template(
@@ -29,6 +32,7 @@ def slideshow():
images=image_urls,
post_urls=post_urls,
tags_list=tags_list,
file_sizes=file_sizes,
active_tags=active_tags,
tag_query=raw_query,
job_id=job_id,
@@ -43,7 +47,7 @@ def download():
return redirect(url_for('slideshow', tags=tags))
job_id = uuid.uuid4().hex[:8]
downloads[job_id] = {'done': 0, 'total': 0, 'finished': False, 'site': site, 'tags': tags}
downloads[job_id] = {'done': 0, 'total': 0, 'finished': False, 'site': site, 'tags': tags, 'status': ''}
proc = subprocess.Popen(
[sys.executable, 'downloader.py', '--site', site, '--query', tags],
@@ -55,7 +59,9 @@ def download():
def read_stdout():
for line in proc.stdout:
line = line.strip()
if line.startswith('total:'):
if line.startswith('status:'):
downloads[job_id]['status'] = line[7:]
elif line.startswith('total:'):
downloads[job_id]['total'] = int(line.split(':')[1])
elif line.startswith('progress:'):
done, total = line.split(':')[1].split('/')
@@ -77,7 +83,7 @@ def download_progress(job_id):
if not info:
yield f'data: {json.dumps({"error": "not found"})}\n\n'
break
yield f'data: {json.dumps({"done": info["done"], "total": info["total"], "finished": info["finished"]})}\n\n'
yield f'data: {json.dumps({"done": info["done"], "total": info["total"], "finished": info["finished"], "status": info["status"]})}\n\n'
if info['finished']:
break
time.sleep(0.3)

View File

@@ -51,15 +51,27 @@ SITES = {
'parse': parse_moebooru,
'post_url_fmt': 'https://yande.re/post/show/{post_id}',
},
'rule34': {
'base_url': 'https://rule34.xxx/',
'endpoint': 'index.php?page=dapi&s=post&q=index&json=1',
'per_page': 100,
'threads': 20,
'envelope': None,
'page_param': 'pid',
'page_start': 0,
'parse': parse_moebooru,
'post_url_fmt': 'https://rule34.xxx/index.php?page=post&s=view&id={post_id}',
},
}
def fetch_all_posts(adapter, query, auth, limit):
posts, page = [], 1
page_param = adapter.get('page_param', 'page')
posts, page = [], adapter.get('page_start', 1)
session = requests.Session()
session.headers['User-Agent'] = 'booru-downloader/2.0 (personal archiver)'
while len(posts) < limit:
params = {'tags': query, 'limit': adapter['per_page'], 'page': page}
params = {'tags': query, 'limit': adapter['per_page'], page_param: page}
if auth:
params.update(auth)
r = session.get(
@@ -104,7 +116,7 @@ def main():
)
parser.add_argument('--site', required=True, choices=SITES.keys())
parser.add_argument('--query', required=True, help='Space-separated tags to search')
parser.add_argument('--limit', type=int, default=10_000)
parser.add_argument('--limit', type=int, default=100)
args = parser.parse_args()
init_db()
@@ -119,18 +131,43 @@ def main():
'E621_LOGIN and E621_API_KEY must be set in .env — see .env.example'
)
auth = {'login': login, 'api_key': api_key}
elif args.site == 'rule34':
api_key = os.environ.get('RULE34_API_KEY')
user_id = os.environ.get('RULE34_USER_ID')
if api_key and user_id:
auth = {'api_key': api_key, 'user_id': user_id}
pictures_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'Pictures')
os.makedirs(pictures_dir, exist_ok=True)
print(f'Fetching post list from {args.site} for query: {args.query!r}')
posts = fetch_all_posts(adapter, args.query, auth, args.limit)
total = len(posts)
print(f'Got {total} posts, downloading...')
# structured progress lines on stdout when piped to Flask; tqdm bar on stderr for console
piped = not sys.stdout.isatty()
scan_cap = args.limit * 10
print(f'Scanning up to {scan_cap} posts from {args.site} for query: {args.query!r}')
all_posts = fetch_all_posts(adapter, args.query, auth, scan_cap)
total_api = len(all_posts)
new_posts, skipped = [], 0
for post in all_posts:
parsed = adapter['parse'](post)
if not parsed:
continue
post_id, _, _ = parsed
if image_exists(args.site, post_id):
skipped += 1
else:
new_posts.append(post)
if len(new_posts) >= args.limit:
break
posts = new_posts
total = len(posts)
start = skipped + 1
end = skipped + total
status_msg = f'skipped {skipped} | fetching {start}{end} of {total_api}'
print(f'Scan done: {status_msg}')
if piped:
print(f'status:{status_msg}', flush=True)
print(f'total:{total}', flush=True)
session = requests.Session()

View File

@@ -46,7 +46,8 @@
.tag-row .tag-plus{flex-shrink:0;color:#555;font-size:.9rem;width:.9rem;text-align:center;padding:.15rem .1rem}
.tag-row .tag-plus:hover{color:#8b8;background:none}
.tag-count{font-size:.7rem;color:#555;flex-shrink:0}
#image-area{flex:1;min-width:0;text-align:center}
#image-area{flex:1;min-width:0;text-align:center;position:relative}
#file-size{position:absolute;top:0;right:0;font-size:.72rem;color:#555;padding:.2rem .4rem;pointer-events:none}
#image-area img{max-width:90%;max-height:80vh}
#progress-box{margin-top:1vh;display:none}
#progress-bar-wrap{width:24rem;height:1rem;background:#333;border-radius:.5rem;display:inline-block;overflow:hidden;vertical-align:middle}
@@ -64,6 +65,7 @@
<option value="e621">e621</option>
<option value="konachan">konachan</option>
<option value="yandere">yandere</option>
<option value="rule34">rule34.xxx</option>
</select>
<button type="button" id="get-btn">Get Images</button>
</form>
@@ -80,6 +82,7 @@
<div id="view">
<div id="tag-sidebar"></div>
<div id="image-area">
<div id="file-size"></div>
<div id="fs-wrap">
<img id="slide" src="{{ images[0] }}" />
<div id="tap-prev"></div>
@@ -126,16 +129,20 @@
const label = document.getElementById('progress-label');
box.style.display = 'block';
const es = new EventSource(`download/progress/${job_id}`);
let statusText = '';
es.onmessage = (e) => {
const d = JSON.parse(e.data);
if (d.error) { label.textContent = 'Error: ' + d.error; es.close(); return; }
if (d.status) statusText = d.status;
const pct = d.total > 0 ? Math.round(d.done / d.total * 100) : 0;
bar.style.width = pct + '%';
if (d.finished) {
label.innerHTML = `Done! ${d.done} images. <a href="?tags={{ tag_query }}" style="color:#8b8">Refresh</a>`;
es.close();
} else if (d.total > 0) {
label.textContent = (statusText ? statusText + ' — ' : '') + `${d.done} / ${d.total}`;
} else {
label.textContent = d.total > 0 ? `${d.done} / ${d.total}` : 'Fetching post list...';
label.textContent = statusText || 'Scanning...';
}
};
}
@@ -143,9 +150,11 @@
{% if images %}
<script>
const images = {{ images|tojson }};
const post_urls = {{ post_urls|tojson }};
const tags_list = {{ tags_list|tojson }};
const images = {{ images|tojson }};
const post_urls = {{ post_urls|tojson }};
const tags_list = {{ tags_list|tojson }};
const file_sizes = {{ file_sizes|tojson }};
function fmt_size(b){ return b >= 1048576 ? (b/1048576).toFixed(1)+' MB' : Math.round(b/1024)+' KB'; }
const sidebar = document.getElementById('tag-sidebar');
const current_query = {{ tag_query|tojson }};
@@ -171,11 +180,12 @@
`</div>`;
}).join('');
}
const img = document.getElementById('slide');
const fs_wrap = document.getElementById('fs-wrap');
const btnT = document.getElementById('toggle');
const btnF = document.getElementById('fullscreen');
const counter = document.getElementById('counter');
const img = document.getElementById('slide');
const fs_wrap = document.getElementById('fs-wrap');
const btnT = document.getElementById('toggle');
const btnF = document.getElementById('fullscreen');
const counter = document.getElementById('counter');
const file_size_el = document.getElementById('file-size');
const _params = new URLSearchParams(window.location.search);
const _url_idx = parseInt(_params.get('idx') || '0', 10);
@@ -257,6 +267,7 @@
i = n;
img.src = images[n];
counter.textContent = `${i+1} / ${images.length}`;
file_size_el.textContent = fmt_size(file_sizes[n]);
render_tags(n);
reset_zoom();
preload_next();
@@ -268,6 +279,7 @@
// init to url index
img.src = images[i];
counter.textContent = `${i+1} / ${images.length}`;
file_size_el.textContent = fmt_size(file_sizes[i]);
render_tags(i);
preload_next();