import anthropic import base64 import json import sys from datetime import datetime from pathlib import Path from dotenv import load_dotenv load_dotenv() # ── Settings ─────────────────────────────────────────────────────────────────── MODEL = "claude-opus-4-6" MAX_TOKENS = 4096 TEMPERATURE = None # 0.0–1.0, None = model default (1.0) TOP_P = None # 0.0–1.0, nucleus sampling, None = off TOP_K = None # int, top-k sampling, None = off STOP_SEQUENCES = None # e.g. ["END", "STOP"], None = off # Maps API model IDs → friendly names used in claude_pricing.json MODEL_NAME_MAP = { "claude-opus-4-6": "Claude Opus 4.6", "claude-opus-4-5": "Claude Opus 4.5", "claude-opus-4-1": "Claude Opus 4.1", "claude-opus-4-0": "Claude Opus 4", "claude-sonnet-4-6": "Claude Sonnet 4.6", "claude-sonnet-4-5": "Claude Sonnet 4.5", "claude-sonnet-4-0": "Claude Sonnet 4", "claude-sonnet-3-7": "Claude Sonnet 3.7", "claude-haiku-4-5": "Claude Haiku 4.5", "claude-haiku-3-5": "Claude Haiku 3.5", "claude-haiku-3": "Claude Haiku 3", "claude-opus-3": "Claude Opus 3", } # ── Paths ────────────────────────────────────────────────────────────────────── LOG_FILE = "log.txt" PRICING_FILE = Path("claude_pricing.json") PICS_DIR = Path("pics") OUTPUT_DIR = Path("output") MEDIA_TYPES = { ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".png": "image/png", ".gif": "image/gif", ".webp": "image/webp", } # ── Pricing ──────────────────────────────────────────────────────────────────── def load_pricing() -> dict: with open(PRICING_FILE, encoding="utf-8") as f: entries = json.load(f) return {e["model"]: e for e in entries} def estimate_cost(model_id: str, input_tokens: int, output_tokens: int) -> str: pricing = load_pricing() friendly_name = MODEL_NAME_MAP.get(model_id) if not friendly_name or friendly_name not in pricing: return "n/a (model not in pricing file)" rates = pricing[friendly_name] input_cost = (input_tokens / 1_000_000) * rates["base_input_tokens_per_mtok"] output_cost = (output_tokens / 1_000_000) * rates["output_tokens_per_mtok"] total = input_cost + output_cost return f"${total:.6f} (in: ${input_cost:.6f} out: ${output_cost:.6f})" # ── Path helpers ─────────────────────────────────────────────────────────────── def resolve_image_path(image_path: str) -> Path: p = Path(image_path) if p.parent == Path(".") and not p.is_absolute(): return PICS_DIR / p return p def resolve_output_path(output_file: str) -> Path: p = Path(output_file) if p.parent == Path(".") and not p.is_absolute(): return OUTPUT_DIR / p return p def get_media_type(image_path: str) -> str: ext = Path(image_path).suffix.lower() media_type = MEDIA_TYPES.get(ext) if not media_type: raise ValueError(f"Unsupported image format: {ext}. Use jpg, png, gif, or webp.") return media_type def load_image_b64(image_path: str) -> str: with open(image_path, "rb") as f: return base64.standard_b64encode(f.read()).decode("utf-8") # ── Logging ──────────────────────────────────────────────────────────────────── def write_log(image_path: str, prompt: str, time_sent: datetime, time_received: datetime, usage, output_file: str): duration = (time_received - time_sent).total_seconds() cost_str = estimate_cost(MODEL, usage.input_tokens, usage.output_tokens) active_settings = ( f"temp={TEMPERATURE if TEMPERATURE is not None else 'default'} " f"max_tokens={MAX_TOKENS} " f"top_p={TOP_P if TOP_P is not None else 'off'} " f"top_k={TOP_K if TOP_K is not None else 'off'} " f"stop_seq={STOP_SEQUENCES if STOP_SEQUENCES is not None else 'off'}" ) entry = ( f"[{time_sent.strftime('%Y-%m-%d %H:%M:%S')}]\n" f" model: {MODEL}\n" f" settings: {active_settings}\n" f" image: {image_path}\n" f" prompt: {prompt}\n" f" output: {output_file}\n" f" sent: {time_sent.strftime('%H:%M:%S.%f')[:-3]}\n" f" received: {time_received.strftime('%H:%M:%S.%f')[:-3]}\n" f" duration: {duration:.2f}s\n" f" tokens in: {usage.input_tokens}\n" f" tokens out: {usage.output_tokens}\n" f" est. cost: {cost_str}\n" ) with open(LOG_FILE, "a", encoding="utf-8") as f: f.write(entry + "\n") # ── Main query ───────────────────────────────────────────────────────────────── def query_claude(image_path: str, prompt: str, output_file: str = "response.txt") -> str: client = anthropic.Anthropic() image_path = str(resolve_image_path(image_path)) output_file = str(resolve_output_path(output_file)) media_type = get_media_type(image_path) image_data = load_image_b64(image_path) optional_params = {} if TEMPERATURE is not None: optional_params["temperature"] = TEMPERATURE if TOP_P is not None: optional_params["top_p"] = TOP_P if TOP_K is not None: optional_params["top_k"] = TOP_K if STOP_SEQUENCES is not None: optional_params["stop_sequences"] = STOP_SEQUENCES time_sent = datetime.now() response = client.messages.create( model=MODEL, max_tokens=MAX_TOKENS, messages=[ { "role": "user", "content": [ { "type": "image", "source": { "type": "base64", "media_type": media_type, "data": image_data, }, }, {"type": "text", "text": prompt}, ], } ], **optional_params, ) time_received = datetime.now() write_log(image_path, prompt, time_sent, time_received, response.usage, output_file) return next(block.text for block in response.content if block.type == "text") def main(): if len(sys.argv) < 3: print("Usage: python image_query.py \"\" [output_file]") sys.exit(1) image_path = sys.argv[1] prompt = sys.argv[2] output_file = sys.argv[3] if len(sys.argv) > 3 else "response.txt" resolved_image = resolve_image_path(image_path) if not resolved_image.exists(): print(f"Error: image file '{resolved_image}' not found.") sys.exit(1) print(f"Querying Claude about '{resolved_image}'...") response_text = query_claude(image_path, prompt, output_file) resolved_output = resolve_output_path(output_file) print("\n--- Response ---") print(response_text) with open(resolved_output, "w", encoding="utf-8") as f: f.write(response_text) print(f"\nSaved to {resolved_output}") if __name__ == "__main__": main()