diff --git a/.env.example b/.env.example index 98e2903..46ce842 100644 --- a/.env.example +++ b/.env.example @@ -1,2 +1,4 @@ E621_LOGIN=your_e621_username E621_API_KEY=your_e621_api_key +RULE34_API_KEY=your_rule34_api_key +RULE34_USER_ID=your_rule34_user_id diff --git a/Slideshow.py b/Slideshow.py index b9f5ee8..0a9d0dc 100644 --- a/Slideshow.py +++ b/Slideshow.py @@ -1,4 +1,5 @@ import json +import os import subprocess import sys import threading @@ -19,9 +20,11 @@ downloads = {} def slideshow(): raw_query = request.args.get('tags', '').strip() results = search_images(raw_query) + pictures_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'Pictures') image_urls = [f'pictures/{r["filename"]}' for r in results] post_urls = [r['post_url'] for r in results] tags_list = [r['tags'].split() for r in results] + file_sizes = [os.path.getsize(os.path.join(pictures_dir, r['filename'])) for r in results] active_tags = raw_query.split() if raw_query else [] job_id = request.args.get('job_id') return render_template( @@ -29,6 +32,7 @@ def slideshow(): images=image_urls, post_urls=post_urls, tags_list=tags_list, + file_sizes=file_sizes, active_tags=active_tags, tag_query=raw_query, job_id=job_id, @@ -43,7 +47,7 @@ def download(): return redirect(url_for('slideshow', tags=tags)) job_id = uuid.uuid4().hex[:8] - downloads[job_id] = {'done': 0, 'total': 0, 'finished': False, 'site': site, 'tags': tags} + downloads[job_id] = {'done': 0, 'total': 0, 'finished': False, 'site': site, 'tags': tags, 'status': ''} proc = subprocess.Popen( [sys.executable, 'downloader.py', '--site', site, '--query', tags], @@ -55,7 +59,9 @@ def download(): def read_stdout(): for line in proc.stdout: line = line.strip() - if line.startswith('total:'): + if line.startswith('status:'): + downloads[job_id]['status'] = line[7:] + elif line.startswith('total:'): downloads[job_id]['total'] = int(line.split(':')[1]) elif line.startswith('progress:'): done, total = line.split(':')[1].split('/') @@ -77,7 +83,7 @@ def download_progress(job_id): if not info: yield f'data: {json.dumps({"error": "not found"})}\n\n' break - yield f'data: {json.dumps({"done": info["done"], "total": info["total"], "finished": info["finished"]})}\n\n' + yield f'data: {json.dumps({"done": info["done"], "total": info["total"], "finished": info["finished"], "status": info["status"]})}\n\n' if info['finished']: break time.sleep(0.3) diff --git a/downloader.py b/downloader.py index 0d601b9..b3e43e4 100644 --- a/downloader.py +++ b/downloader.py @@ -51,15 +51,27 @@ SITES = { 'parse': parse_moebooru, 'post_url_fmt': 'https://yande.re/post/show/{post_id}', }, + 'rule34': { + 'base_url': 'https://rule34.xxx/', + 'endpoint': 'index.php?page=dapi&s=post&q=index&json=1', + 'per_page': 100, + 'threads': 20, + 'envelope': None, + 'page_param': 'pid', + 'page_start': 0, + 'parse': parse_moebooru, + 'post_url_fmt': 'https://rule34.xxx/index.php?page=post&s=view&id={post_id}', + }, } def fetch_all_posts(adapter, query, auth, limit): - posts, page = [], 1 + page_param = adapter.get('page_param', 'page') + posts, page = [], adapter.get('page_start', 1) session = requests.Session() session.headers['User-Agent'] = 'booru-downloader/2.0 (personal archiver)' while len(posts) < limit: - params = {'tags': query, 'limit': adapter['per_page'], 'page': page} + params = {'tags': query, 'limit': adapter['per_page'], page_param: page} if auth: params.update(auth) r = session.get( @@ -104,7 +116,7 @@ def main(): ) parser.add_argument('--site', required=True, choices=SITES.keys()) parser.add_argument('--query', required=True, help='Space-separated tags to search') - parser.add_argument('--limit', type=int, default=10_000) + parser.add_argument('--limit', type=int, default=100) args = parser.parse_args() init_db() @@ -119,18 +131,43 @@ def main(): 'E621_LOGIN and E621_API_KEY must be set in .env — see .env.example' ) auth = {'login': login, 'api_key': api_key} + elif args.site == 'rule34': + api_key = os.environ.get('RULE34_API_KEY') + user_id = os.environ.get('RULE34_USER_ID') + if api_key and user_id: + auth = {'api_key': api_key, 'user_id': user_id} pictures_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'Pictures') os.makedirs(pictures_dir, exist_ok=True) - print(f'Fetching post list from {args.site} for query: {args.query!r}') - posts = fetch_all_posts(adapter, args.query, auth, args.limit) - total = len(posts) - print(f'Got {total} posts, downloading...') - - # structured progress lines on stdout when piped to Flask; tqdm bar on stderr for console piped = not sys.stdout.isatty() + + scan_cap = args.limit * 10 + print(f'Scanning up to {scan_cap} posts from {args.site} for query: {args.query!r}') + all_posts = fetch_all_posts(adapter, args.query, auth, scan_cap) + total_api = len(all_posts) + + new_posts, skipped = [], 0 + for post in all_posts: + parsed = adapter['parse'](post) + if not parsed: + continue + post_id, _, _ = parsed + if image_exists(args.site, post_id): + skipped += 1 + else: + new_posts.append(post) + if len(new_posts) >= args.limit: + break + + posts = new_posts + total = len(posts) + start = skipped + 1 + end = skipped + total + status_msg = f'skipped {skipped} | fetching {start}–{end} of {total_api}' + print(f'Scan done: {status_msg}') if piped: + print(f'status:{status_msg}', flush=True) print(f'total:{total}', flush=True) session = requests.Session() diff --git a/templates/slideshow.html b/templates/slideshow.html index a0197e6..d7f12cc 100644 --- a/templates/slideshow.html +++ b/templates/slideshow.html @@ -46,7 +46,8 @@ .tag-row .tag-plus{flex-shrink:0;color:#555;font-size:.9rem;width:.9rem;text-align:center;padding:.15rem .1rem} .tag-row .tag-plus:hover{color:#8b8;background:none} .tag-count{font-size:.7rem;color:#555;flex-shrink:0} - #image-area{flex:1;min-width:0;text-align:center} + #image-area{flex:1;min-width:0;text-align:center;position:relative} + #file-size{position:absolute;top:0;right:0;font-size:.72rem;color:#555;padding:.2rem .4rem;pointer-events:none} #image-area img{max-width:90%;max-height:80vh} #progress-box{margin-top:1vh;display:none} #progress-bar-wrap{width:24rem;height:1rem;background:#333;border-radius:.5rem;display:inline-block;overflow:hidden;vertical-align:middle} @@ -64,6 +65,7 @@ + @@ -80,6 +82,7 @@