- log_thread.py: thread-safe ContextVar bridge so executor threads can log
individual LLM calls and archive searches back to the event loop
- ai_log.py: init_thread_logging(), notify_entity_update(); WS now pushes
entity_update messages when book data changes after any plugin or batch run
- batch.py: replace batch_pending.json with batch_queue SQLite table;
run_batch_consumer() reads queue dynamically so new books can be added
while batch is running; add_to_queue() deduplicates
- migrate.py: fix _migrate_v1 (clear-on-startup bug); add _migrate_v2 for
batch_queue table
- _client.py / archive.py / identification.py: wrap each LLM API call and
archive search with log_thread start/finish entries
- api.py: POST /api/batch returns {already_running, added}; notify_entity_update
after identify pipeline
- models.default.yaml: strengthen ai_identify confidence-scoring instructions;
warn against placeholder data
- detail-render.js: book log entries show clickable ID + spine thumbnail;
book spine/title images open full-screen popup
- events.js: batch-start handles already_running+added; open-img-popup action
- init.js: entity_update WS handler; image popup close listeners
- overlays.css / index.html: full-screen image popup overlay
- eslint.config.js: add new globals; fix no-redeclare/no-unused-vars for
multi-file global architecture; all lint errors resolved
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
61 lines
2.0 KiB
Python
61 lines
2.0 KiB
Python
"""Text recognizer plugin — spine image → raw text + structured fields.
|
|
|
|
Input: book spine image.
|
|
Output: {"raw_text": "...", "title": "...", "author": "...", "year": "...",
|
|
"publisher": "...", "other": "..."}
|
|
raw_text — all visible text verbatim, line-break separated.
|
|
other fields — VLM interpretation of raw_text.
|
|
Result added to books.candidates and books.raw_text.
|
|
"""
|
|
|
|
from models import AIConfig, TextRecognizeResult
|
|
|
|
from ._client import AIClient
|
|
|
|
|
|
class TextRecognizerPlugin:
|
|
"""Reads text from a book spine image using a VLM."""
|
|
|
|
category = "text_recognizers"
|
|
OUTPUT_FORMAT = (
|
|
'{"raw_text": "The Great Gatsby\\nF. Scott Fitzgerald\\nScribner", '
|
|
'"title": "The Great Gatsby", "author": "F. Scott Fitzgerald", '
|
|
'"year": "", "publisher": "Scribner", "other": ""}'
|
|
)
|
|
|
|
def __init__(
|
|
self,
|
|
plugin_id: str,
|
|
name: str,
|
|
ai_config: AIConfig,
|
|
prompt_text: str,
|
|
auto_queue: bool,
|
|
rate_limit_seconds: float,
|
|
):
|
|
self.plugin_id = plugin_id
|
|
self.name = name
|
|
self.auto_queue = auto_queue
|
|
self.rate_limit_seconds = rate_limit_seconds
|
|
self._client = AIClient(ai_config, self.OUTPUT_FORMAT)
|
|
self._prompt_text = prompt_text
|
|
|
|
def recognize(self, image_b64: str, image_mime: str) -> TextRecognizeResult:
|
|
"""Returns TextRecognizeResult with raw_text, title, author, year, publisher, other."""
|
|
raw = self._client.call(self._prompt_text, [(image_b64, image_mime)])
|
|
return TextRecognizeResult(
|
|
raw_text=str(raw.get("raw_text") or ""),
|
|
title=str(raw.get("title") or ""),
|
|
author=str(raw.get("author") or ""),
|
|
year=str(raw.get("year") or ""),
|
|
publisher=str(raw.get("publisher") or ""),
|
|
other=str(raw.get("other") or ""),
|
|
)
|
|
|
|
@property
|
|
def model(self) -> str:
|
|
return self._client.cfg["model"]
|
|
|
|
@property
|
|
def max_image_px(self) -> int:
|
|
return self._client.cfg["max_image_px"]
|