feat: LLM Batch API — OpenAI + Anthropic 50% cost reduction (v0.9.8)
Submit up to 1000 unbriefed documents to the provider Batch API in one
shot instead of individual synchronous LLM calls. Results are polled
every 30 minutes via a new Celery beat task and imported automatically.
- New worker: llm_batch_processor.py
- submit_llm_batch: guards against duplicate batches, builds JSONL
(OpenAI) or request list (Anthropic), stores state in AppSetting
- poll_llm_batch_results: checks batch status, imports completed
results with idempotency, emits notifications + triggers news fetch
- celery_app: register worker, route to llm queue, beat every 30 min
- admin API: POST /submit-llm-batch + GET /llm-batch-status endpoints
- Frontend: submitLlmBatch + getLlmBatchStatus in adminAPI; settings
page shows batch control row (openai/anthropic only) with live
progress line while batch is processing
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -281,6 +281,31 @@ async def clear_gnews_cache_endpoint(current_user: User = Depends(get_current_ad
|
||||
return {"cleared": cleared}
|
||||
|
||||
|
||||
@router.post("/submit-llm-batch")
|
||||
async def submit_llm_batch_endpoint(current_user: User = Depends(get_current_admin)):
|
||||
"""Submit all unbriefed documents to the Batch API (OpenAI/Anthropic only)."""
|
||||
from app.workers.llm_batch_processor import submit_llm_batch
|
||||
task = submit_llm_batch.delay()
|
||||
return {"task_id": task.id, "status": "queued"}
|
||||
|
||||
|
||||
@router.get("/llm-batch-status")
|
||||
async def get_llm_batch_status(
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_admin),
|
||||
):
|
||||
"""Return the current batch job state, or no_active_batch if none."""
|
||||
import json
|
||||
from app.models.setting import AppSetting
|
||||
row = await db.get(AppSetting, "llm_active_batch")
|
||||
if not row:
|
||||
return {"status": "no_active_batch"}
|
||||
try:
|
||||
return json.loads(row.value)
|
||||
except Exception:
|
||||
return {"status": "unknown"}
|
||||
|
||||
|
||||
@router.get("/api-health")
|
||||
async def api_health(current_user: User = Depends(get_current_admin)):
|
||||
"""Test each external API and return status + latency for each."""
|
||||
|
||||
Reference in New Issue
Block a user