"""Book identifier plugin — VLM result + archive candidates → ranked identification blocks. Input: raw_text string (from text_recognizer), archive_results (deduplicated candidates), images (list of (b64, mime) pairs if is_vlm). Output: list of IdentifyBlock dicts ranked by descending confidence score. Result stored as books.ai_blocks JSON. """ import json from typing import Any, TypeGuard from models import AIConfig, CandidateRecord, IdentifyBlock from ._client import AIClient def _is_str_dict(v: object) -> TypeGuard[dict[str, Any]]: return isinstance(v, dict) def _is_any_list(v: object) -> TypeGuard[list[Any]]: return isinstance(v, list) class BookIdentifierPlugin: """Identifies a book by combining VLM spine text with archive search results.""" category = "book_identifiers" OUTPUT_FORMAT = ( '[{"title": "The Master and Margarita", "author": "Mikhail Bulgakov", ' '"year": "1967", "isbn": "", "publisher": "YMCA Press", ' '"score": 0.95, "sources": ["rusneb", "openlibrary"]}, ' '{"title": "Master i Margarita", "author": "M. Bulgakov", ' '"year": "2005", "isbn": "978-5-17-123456-7", "publisher": "AST", ' '"score": 0.72, "sources": ["web"]}]' ) def __init__( self, plugin_id: str, name: str, ai_config: AIConfig, prompt_text: str, auto_queue: bool, rate_limit_seconds: float, ): self.plugin_id = plugin_id self.name = name self.auto_queue = auto_queue self.rate_limit_seconds = rate_limit_seconds self._client = AIClient(ai_config, self.OUTPUT_FORMAT) self._prompt_text = prompt_text def identify( self, raw_text: str, archive_results: list[CandidateRecord], images: list[tuple[str, str]], ) -> list[IdentifyBlock]: """Call the AI model to produce ranked identification blocks. Args: raw_text: Verbatim text read from the book spine. archive_results: Deduplicated candidates from archive searchers. images: (base64, mime_type) pairs; non-empty only when is_vlm is True. Returns: List of IdentifyBlock dicts ranked by descending score. """ archive_json = json.dumps(archive_results, ensure_ascii=False) raw = self._client.call( self._prompt_text, images, text_vars={"RAW_TEXT": raw_text, "ARCHIVE_RESULTS": archive_json}, output_is_list=True, ) blocks: list[IdentifyBlock] = [] for item in raw: if not _is_str_dict(item): continue sources: list[str] = [] sources_val = item.get("sources") if _is_any_list(sources_val): for sv in sources_val: if isinstance(sv, str): sources.append(sv) block = IdentifyBlock( title=str(item.get("title") or "").strip(), author=str(item.get("author") or "").strip(), year=str(item.get("year") or "").strip(), isbn=str(item.get("isbn") or "").strip(), publisher=str(item.get("publisher") or "").strip(), score=float(item.get("score") or 0.0), sources=sources, ) blocks.append(block) return sorted(blocks, key=lambda b: b.get("score", 0.0), reverse=True) @property def model(self) -> str: """AI model name used for identification.""" return self._client.cfg["model"] @property def max_image_px(self) -> int: """Maximum pixel dimension for images passed to the AI model.""" return self._client.cfg["max_image_px"] @property def confidence_threshold(self) -> float: """Minimum score threshold for the top block to set ai_* fields.""" return self._client.cfg["confidence_threshold"] @property def is_vlm(self) -> bool: """True if images should be included in the request.""" return self._client.cfg["is_vlm"]