rule34 support, smarter downloads, file size display

You're welcome. Now behave. CHANGES: - Added rule34.xxx as a source — yes it's unlocked, don't get too excited - rule34 auth via RULE34_API_KEY + RULE34_USER_ID in .env, because you don't get in without permission - Fixed pagination for Gelbooru-style APIs (pid, 0-indexed) — the old page param was just embarrassing - Default download limit capped at 100 per request — you don't get unlimited, you get what you're given - Downloader now scans 10x the limit first, skips what's already owned, then fetches only fresh ones — efficient, like you should be - Progress bar now shows scan status: "skipped N | fetching X–Y of Z" — full transparency, no excuses - File size shown top-right of the image in small text — size matters and now you can see it
2026-05-06 00:56:02 +02:00
parent 1fad0b736e
commit 45af8a2ace
4 changed files with 79 additions and 22 deletions
--- a/.env.example
+++ b/.env.example
@@ -1,2 +1,4 @@
 E621_LOGIN=your_e621_username
 E621_API_KEY=your_e621_api_key
+RULE34_API_KEY=your_rule34_api_key
+RULE34_USER_ID=your_rule34_user_id
--- a/Slideshow.py
+++ b/Slideshow.py
@@ -1,4 +1,5 @@
 import json
+import os
 import subprocess
 import sys
 import threading
@@ -19,9 +20,11 @@ downloads = {}
 def slideshow():
    raw_query = request.args.get('tags', '').strip()
    results = search_images(raw_query)
+    pictures_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'Pictures')
    image_urls = [f'pictures/{r["filename"]}' for r in results]
    post_urls  = [r['post_url'] for r in results]
    tags_list  = [r['tags'].split() for r in results]
+    file_sizes = [os.path.getsize(os.path.join(pictures_dir, r['filename'])) for r in results]
    active_tags = raw_query.split() if raw_query else []
    job_id = request.args.get('job_id')
    return render_template(
@@ -29,6 +32,7 @@ def slideshow():
        images=image_urls,
        post_urls=post_urls,
        tags_list=tags_list,
+        file_sizes=file_sizes,
        active_tags=active_tags,
        tag_query=raw_query,
        job_id=job_id,
@@ -43,7 +47,7 @@ def download():
        return redirect(url_for('slideshow', tags=tags))

    job_id = uuid.uuid4().hex[:8]
-    downloads[job_id] = {'done': 0, 'total': 0, 'finished': False, 'site': site, 'tags': tags}
+    downloads[job_id] = {'done': 0, 'total': 0, 'finished': False, 'site': site, 'tags': tags, 'status': ''}

    proc = subprocess.Popen(
        [sys.executable, 'downloader.py', '--site', site, '--query', tags],
@@ -55,7 +59,9 @@ def download():
    def read_stdout():
        for line in proc.stdout:
            line = line.strip()
-            if line.startswith('total:'):
+            if line.startswith('status:'):
+                downloads[job_id]['status'] = line[7:]
+            elif line.startswith('total:'):
                downloads[job_id]['total'] = int(line.split(':')[1])
            elif line.startswith('progress:'):
                done, total = line.split(':')[1].split('/')
@@ -77,7 +83,7 @@ def download_progress(job_id):
            if not info:
                yield f'data: {json.dumps({"error": "not found"})}\n\n'
                break
-            yield f'data: {json.dumps({"done": info["done"], "total": info["total"], "finished": info["finished"]})}\n\n'
+            yield f'data: {json.dumps({"done": info["done"], "total": info["total"], "finished": info["finished"], "status": info["status"]})}\n\n'
            if info['finished']:
                break
            time.sleep(0.3)
--- a/downloader.py
+++ b/downloader.py
@@ -51,15 +51,27 @@ SITES = {
        'parse': parse_moebooru,
        'post_url_fmt': 'https://yande.re/post/show/{post_id}',
    },
+    'rule34': {
+        'base_url': 'https://rule34.xxx/',
+        'endpoint': 'index.php?page=dapi&s=post&q=index&json=1',
+        'per_page': 100,
+        'threads': 20,
+        'envelope': None,
+        'page_param': 'pid',
+        'page_start': 0,
+        'parse': parse_moebooru,
+        'post_url_fmt': 'https://rule34.xxx/index.php?page=post&s=view&id={post_id}',
+    },
 }


 def fetch_all_posts(adapter, query, auth, limit):
-    posts, page = [], 1
+    page_param = adapter.get('page_param', 'page')
+    posts, page = [], adapter.get('page_start', 1)
    session = requests.Session()
    session.headers['User-Agent'] = 'booru-downloader/2.0 (personal archiver)'
    while len(posts) < limit:
-        params = {'tags': query, 'limit': adapter['per_page'], 'page': page}
+        params = {'tags': query, 'limit': adapter['per_page'], page_param: page}
        if auth:
            params.update(auth)
        r = session.get(
@@ -104,7 +116,7 @@ def main():
    )
    parser.add_argument('--site', required=True, choices=SITES.keys())
    parser.add_argument('--query', required=True, help='Space-separated tags to search')
-    parser.add_argument('--limit', type=int, default=10_000)
+    parser.add_argument('--limit', type=int, default=100)
    args = parser.parse_args()

    init_db()
@@ -119,18 +131,43 @@ def main():
                'E621_LOGIN and E621_API_KEY must be set in .env — see .env.example'
            )
        auth = {'login': login, 'api_key': api_key}
+    elif args.site == 'rule34':
+        api_key = os.environ.get('RULE34_API_KEY')
+        user_id = os.environ.get('RULE34_USER_ID')
+        if api_key and user_id:
+            auth = {'api_key': api_key, 'user_id': user_id}

    pictures_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'Pictures')
    os.makedirs(pictures_dir, exist_ok=True)

-    print(f'Fetching post list from {args.site} for query: {args.query!r}')
-    posts = fetch_all_posts(adapter, args.query, auth, args.limit)
-    total = len(posts)
-    print(f'Got {total} posts, downloading...')
-
-    # structured progress lines on stdout when piped to Flask; tqdm bar on stderr for console
    piped = not sys.stdout.isatty()
+
+    scan_cap = args.limit * 10
+    print(f'Scanning up to {scan_cap} posts from {args.site} for query: {args.query!r}')
+    all_posts = fetch_all_posts(adapter, args.query, auth, scan_cap)
+    total_api = len(all_posts)
+
+    new_posts, skipped = [], 0
+    for post in all_posts:
+        parsed = adapter['parse'](post)
+        if not parsed:
+            continue
+        post_id, _, _ = parsed
+        if image_exists(args.site, post_id):
+            skipped += 1
+        else:
+            new_posts.append(post)
+            if len(new_posts) >= args.limit:
+                break
+
+    posts = new_posts
+    total = len(posts)
+    start = skipped + 1
+    end = skipped + total
+    status_msg = f'skipped {skipped} | fetching {start}–{end} of {total_api}'
+    print(f'Scan done: {status_msg}')
    if piped:
+        print(f'status:{status_msg}', flush=True)
        print(f'total:{total}', flush=True)

    session = requests.Session()
--- a/templates/slideshow.html
+++ b/templates/slideshow.html
@@ -46,7 +46,8 @@
        .tag-row .tag-plus{flex-shrink:0;color:#555;font-size:.9rem;width:.9rem;text-align:center;padding:.15rem .1rem}
        .tag-row .tag-plus:hover{color:#8b8;background:none}
        .tag-count{font-size:.7rem;color:#555;flex-shrink:0}
-        #image-area{flex:1;min-width:0;text-align:center}
+        #image-area{flex:1;min-width:0;text-align:center;position:relative}
+        #file-size{position:absolute;top:0;right:0;font-size:.72rem;color:#555;padding:.2rem .4rem;pointer-events:none}
        #image-area img{max-width:90%;max-height:80vh}
        #progress-box{margin-top:1vh;display:none}
        #progress-bar-wrap{width:24rem;height:1rem;background:#333;border-radius:.5rem;display:inline-block;overflow:hidden;vertical-align:middle}
@@ -64,6 +65,7 @@
            <option value="e621">e621</option>
            <option value="konachan">konachan</option>
            <option value="yandere">yandere</option>
+            <option value="rule34">rule34.xxx</option>
        </select>
        <button type="button" id="get-btn">Get Images</button>
    </form>
@@ -80,6 +82,7 @@
    <div id="view">
        <div id="tag-sidebar"></div>
        <div id="image-area">
+            <div id="file-size"></div>
            <div id="fs-wrap">
                <img id="slide" src="{{ images[0] }}" />
                <div id="tap-prev"></div>
@@ -126,16 +129,20 @@
            const label = document.getElementById('progress-label');
            box.style.display = 'block';
            const es = new EventSource(`download/progress/${job_id}`);
+            let statusText = '';
            es.onmessage = (e) => {
                const d = JSON.parse(e.data);
                if (d.error) { label.textContent = 'Error: ' + d.error; es.close(); return; }
+                if (d.status) statusText = d.status;
                const pct = d.total > 0 ? Math.round(d.done / d.total * 100) : 0;
                bar.style.width = pct + '%';
                if (d.finished) {
                    label.innerHTML = `Done! ${d.done} images. <a href="?tags={{ tag_query }}" style="color:#8b8">Refresh</a>`;
                    es.close();
+                } else if (d.total > 0) {
+                    label.textContent = (statusText ? statusText + ' — ' : '') + `${d.done} / ${d.total}`;
                } else {
-                    label.textContent = d.total > 0 ? `${d.done} / ${d.total}` : 'Fetching post list...';
+                    label.textContent = statusText || 'Scanning...';
                }
            };
        }
@@ -143,9 +150,11 @@

    {% if images %}
    <script>
-        const images    = {{ images|tojson }};
-        const post_urls = {{ post_urls|tojson }};
-        const tags_list = {{ tags_list|tojson }};
+        const images     = {{ images|tojson }};
+        const post_urls  = {{ post_urls|tojson }};
+        const tags_list  = {{ tags_list|tojson }};
+        const file_sizes = {{ file_sizes|tojson }};
+        function fmt_size(b){ return b >= 1048576 ? (b/1048576).toFixed(1)+' MB' : Math.round(b/1024)+' KB'; }
        const sidebar   = document.getElementById('tag-sidebar');

        const current_query = {{ tag_query|tojson }};
@@ -171,11 +180,12 @@
                    `</div>`;
            }).join('');
        }
-        const img     = document.getElementById('slide');
-        const fs_wrap = document.getElementById('fs-wrap');
-        const btnT    = document.getElementById('toggle');
-        const btnF    = document.getElementById('fullscreen');
-        const counter = document.getElementById('counter');
+        const img       = document.getElementById('slide');
+        const fs_wrap   = document.getElementById('fs-wrap');
+        const btnT      = document.getElementById('toggle');
+        const btnF      = document.getElementById('fullscreen');
+        const counter   = document.getElementById('counter');
+        const file_size_el = document.getElementById('file-size');

        const _params  = new URLSearchParams(window.location.search);
        const _url_idx = parseInt(_params.get('idx') || '0', 10);
@@ -257,6 +267,7 @@
            i = n;
            img.src = images[n];
            counter.textContent = `${i+1} / ${images.length}`;
+            file_size_el.textContent = fmt_size(file_sizes[n]);
            render_tags(n);
            reset_zoom();
            preload_next();
@@ -268,6 +279,7 @@
        // init to url index
        img.src = images[i];
        counter.textContent = `${i+1} / ${images.length}`;
+        file_size_el.textContent = fmt_size(file_sizes[i]);
        render_tags(i);
        preload_next();