Photo-based book cataloger with AI identification. Room → Cabinet → Shelf → Book hierarchy; FastAPI + SQLite backend; vanilla JS SPA; OpenAI-compatible plugin system for boundary detection, text recognition, and archive search.
246 lines
9.0 KiB
Python
246 lines
9.0 KiB
Python
"""Book identification logic: status computation, AI result application, plugin runners."""
|
|
|
|
import json
|
|
|
|
import db
|
|
from db import now
|
|
from errors import BookNotFoundError, NoRawTextError
|
|
from logic.boundaries import book_spine_source
|
|
from logic.images import prep_img_b64
|
|
from models import (
|
|
AIIdentifyResult,
|
|
BookIdentifierPlugin,
|
|
BookRow,
|
|
CandidateRecord,
|
|
TextRecognizeResult,
|
|
TextRecognizerPlugin,
|
|
)
|
|
|
|
AI_FIELDS = ("title", "author", "year", "isbn", "publisher")
|
|
_APPROVED_REQUIRED = ("title", "author", "year")
|
|
|
|
|
|
def compute_status(book: BookRow) -> str:
|
|
"""Return the identification_status string derived from current book field values.
|
|
|
|
Args:
|
|
book: The book row to evaluate.
|
|
|
|
Returns:
|
|
One of 'unidentified', 'ai_identified', or 'user_approved'.
|
|
"""
|
|
if not (book.ai_title or "").strip():
|
|
return "unidentified"
|
|
filled = all((getattr(book, f) or "").strip() for f in _APPROVED_REQUIRED)
|
|
no_diff = all(
|
|
not (getattr(book, f"ai_{f}") or "").strip()
|
|
or (getattr(book, f) or "").strip() == (getattr(book, f"ai_{f}") or "").strip()
|
|
for f in AI_FIELDS
|
|
)
|
|
return "user_approved" if (filled and no_diff) else "ai_identified"
|
|
|
|
|
|
def build_query(book: BookRow) -> str:
|
|
"""Build a search query string from the best available candidate fields.
|
|
|
|
Prefers the first candidate with a non-empty author+title pair; falls back to
|
|
AI fields, then raw OCR text.
|
|
|
|
Args:
|
|
book: The book row to build a query for.
|
|
|
|
Returns:
|
|
Query string, empty if no usable data is available.
|
|
"""
|
|
candidates: list[dict[str, object]] = json.loads(book.candidates or "[]")
|
|
for c in candidates:
|
|
q = " ".join(filter(None, [(str(c.get("author") or "")).strip(), (str(c.get("title") or "")).strip()]))
|
|
if q:
|
|
return q
|
|
q = " ".join(filter(None, [(book.ai_author or "").strip(), (book.ai_title or "").strip()]))
|
|
if q:
|
|
return q
|
|
return (book.raw_text or "").strip()
|
|
|
|
|
|
def save_user_fields(book_id: str, title: str, author: str, year: str, isbn: str, publisher: str, notes: str) -> str:
|
|
"""Persist user-edited fields and recompute identification status.
|
|
|
|
Also sets ai_* fields to match user values so they are treated as approved.
|
|
|
|
Args:
|
|
book_id: ID of the book to update.
|
|
title: User-provided title.
|
|
author: User-provided author.
|
|
year: User-provided year.
|
|
isbn: User-provided ISBN.
|
|
publisher: User-provided publisher.
|
|
notes: User-provided notes.
|
|
|
|
Returns:
|
|
Updated identification_status string.
|
|
"""
|
|
with db.transaction() as c:
|
|
db.set_user_book_fields(c, book_id, title, author, year, isbn, publisher, notes)
|
|
book = db.get_book(c, book_id)
|
|
status = compute_status(book) if book else "unidentified"
|
|
db.set_book_status(c, book_id, status)
|
|
return status
|
|
|
|
|
|
def dismiss_field(book_id: str, field: str, value: str) -> tuple[str, list[CandidateRecord]]:
|
|
"""Dismiss a candidate suggestion for a field.
|
|
|
|
If value is non-empty: removes matching candidates and reverts ai_field to the
|
|
user value if it matched. If value is empty: sets ai_field to the current user value.
|
|
|
|
Args:
|
|
book_id: ID of the book.
|
|
field: Field name (one of AI_FIELDS).
|
|
value: Candidate value to dismiss, or empty string to dismiss the AI suggestion.
|
|
|
|
Returns:
|
|
(identification_status, updated_candidates).
|
|
|
|
Raises:
|
|
BookNotFoundError: If book_id does not exist.
|
|
"""
|
|
with db.transaction() as c:
|
|
book = db.get_book(c, book_id)
|
|
if not book:
|
|
raise BookNotFoundError(book_id)
|
|
candidates: list[CandidateRecord] = json.loads(book.candidates or "[]")
|
|
if value:
|
|
candidates = [cand for cand in candidates if (str(cand.get(field) or "")).strip() != value]
|
|
db.set_book_candidates(c, book_id, json.dumps(candidates))
|
|
if (getattr(book, f"ai_{field}") or "").strip() == value:
|
|
db.set_book_ai_field(c, book_id, field, str(getattr(book, field) or ""))
|
|
else:
|
|
db.set_book_ai_field(c, book_id, field, str(getattr(book, field) or ""))
|
|
book = db.get_book(c, book_id)
|
|
status = compute_status(book) if book else "unidentified"
|
|
db.set_book_status(c, book_id, status)
|
|
candidates = json.loads(book.candidates or "[]") if book else []
|
|
return status, candidates
|
|
|
|
|
|
def apply_ai_result(book_id: str, result: AIIdentifyResult, confidence_threshold: float = 0.8) -> None:
|
|
"""Apply an AI identification result to a book.
|
|
|
|
Stores confidence unconditionally; sets ai_* fields only when confidence meets the threshold.
|
|
|
|
Args:
|
|
book_id: ID of the book to update.
|
|
result: AI identification result dict.
|
|
confidence_threshold: Minimum confidence to write ai_* fields (default 0.8).
|
|
"""
|
|
confidence = float(result.get("confidence") or 0)
|
|
with db.transaction() as c:
|
|
db.set_book_confidence(c, book_id, confidence, now())
|
|
if confidence < confidence_threshold:
|
|
return
|
|
db.set_book_ai_fields(
|
|
c,
|
|
book_id,
|
|
result.get("title") or "",
|
|
result.get("author") or "",
|
|
result.get("year") or "",
|
|
result.get("isbn") or "",
|
|
result.get("publisher") or "",
|
|
)
|
|
book = db.get_book(c, book_id)
|
|
if book:
|
|
db.set_book_status(c, book_id, compute_status(book))
|
|
|
|
|
|
def run_text_recognizer(plugin: TextRecognizerPlugin, book_id: str) -> BookRow:
|
|
"""Recognize text from a book spine image and store the result.
|
|
|
|
Calls the plugin with the book's spine image, stores raw_text, and merges
|
|
the result into the candidates list.
|
|
|
|
Args:
|
|
plugin: The text recognizer plugin to execute.
|
|
book_id: ID of the book to process.
|
|
|
|
Returns:
|
|
Updated BookRow after storing the result.
|
|
|
|
Raises:
|
|
BookNotFoundError: If book_id does not exist.
|
|
"""
|
|
with db.transaction() as c:
|
|
book = db.get_book(c, book_id)
|
|
if not book:
|
|
raise BookNotFoundError(book_id)
|
|
spine_path, spine_crop = book_spine_source(c, book_id)
|
|
b64, mt = prep_img_b64(spine_path, spine_crop, max_px=plugin.max_image_px)
|
|
result: TextRecognizeResult = plugin.recognize(b64, mt)
|
|
raw_text = result.get("raw_text") or ""
|
|
cand: CandidateRecord = {
|
|
"source": plugin.plugin_id,
|
|
"title": (result.get("title") or "").strip(),
|
|
"author": (result.get("author") or "").strip(),
|
|
"year": (result.get("year") or "").strip(),
|
|
"publisher": (result.get("publisher") or "").strip(),
|
|
"isbn": "",
|
|
}
|
|
existing: list[CandidateRecord] = json.loads(book.candidates or "[]")
|
|
existing = [cd for cd in existing if cd.get("source") != plugin.plugin_id]
|
|
if any([cand["title"], cand["author"], cand["year"], cand["publisher"]]):
|
|
existing.append(cand)
|
|
db.set_book_raw_text(c, book_id, raw_text)
|
|
db.set_book_candidates(c, book_id, json.dumps(existing))
|
|
updated = db.get_book(c, book_id)
|
|
if not updated:
|
|
raise BookNotFoundError(book_id)
|
|
return updated
|
|
|
|
|
|
def run_book_identifier(plugin: BookIdentifierPlugin, book_id: str) -> BookRow:
|
|
"""Identify a book using AI and update ai_* fields and candidates.
|
|
|
|
Requires raw_text to have been populated by a text recognizer first.
|
|
|
|
Args:
|
|
plugin: The book identifier plugin to execute.
|
|
book_id: ID of the book to process.
|
|
|
|
Returns:
|
|
Updated BookRow after storing the identification result.
|
|
|
|
Raises:
|
|
BookNotFoundError: If book_id does not exist.
|
|
NoRawTextError: If the book has no raw_text (text recognizer has not run).
|
|
"""
|
|
with db.transaction() as c:
|
|
book = db.get_book(c, book_id)
|
|
if not book:
|
|
raise BookNotFoundError(book_id)
|
|
raw_text = (book.raw_text or "").strip()
|
|
if not raw_text:
|
|
raise NoRawTextError(book_id)
|
|
result: AIIdentifyResult = plugin.identify(raw_text)
|
|
# apply_ai_result manages its own transaction
|
|
apply_ai_result(book_id, result, plugin.confidence_threshold)
|
|
with db.transaction() as c:
|
|
book = db.get_book(c, book_id)
|
|
if not book:
|
|
raise BookNotFoundError(book_id)
|
|
cand: CandidateRecord = {
|
|
"source": plugin.plugin_id,
|
|
"title": (result.get("title") or "").strip(),
|
|
"author": (result.get("author") or "").strip(),
|
|
"year": (result.get("year") or "").strip(),
|
|
"isbn": (result.get("isbn") or "").strip(),
|
|
"publisher": (result.get("publisher") or "").strip(),
|
|
}
|
|
existing: list[CandidateRecord] = json.loads(book.candidates or "[]")
|
|
existing = [cd for cd in existing if cd.get("source") != plugin.plugin_id]
|
|
existing.append(cand)
|
|
db.set_book_candidates(c, book_id, json.dumps(existing))
|
|
updated = db.get_book(c, book_id)
|
|
if not updated:
|
|
raise BookNotFoundError(book_id)
|
|
return updated
|