feat: LLM Batch API — OpenAI + Anthropic 50% cost reduction (v0.9.8)

Submit up to 1000 unbriefed documents to the provider Batch API in one
shot instead of individual synchronous LLM calls. Results are polled
every 30 minutes via a new Celery beat task and imported automatically.

- New worker: llm_batch_processor.py
  - submit_llm_batch: guards against duplicate batches, builds JSONL
    (OpenAI) or request list (Anthropic), stores state in AppSetting
  - poll_llm_batch_results: checks batch status, imports completed
    results with idempotency, emits notifications + triggers news fetch
- celery_app: register worker, route to llm queue, beat every 30 min
- admin API: POST /submit-llm-batch + GET /llm-batch-status endpoints
- Frontend: submitLlmBatch + getLlmBatchStatus in adminAPI; settings
  page shows batch control row (openai/anthropic only) with live
  progress line while batch is processing

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Jack Levy
2026-03-14 17:35:15 -04:00
parent 7e5c5b473e
commit cba19c7bb3
5 changed files with 467 additions and 0 deletions

View File

@@ -25,6 +25,13 @@ import Link from "next/link";
import { settingsAPI, adminAPI, notificationsAPI, type AdminUser, type LLMModel, type ApiHealthResult } from "@/lib/api";
import { useAuthStore } from "@/stores/authStore";
function relativeTime(isoStr: string): string {
const diff = Date.now() - new Date(isoStr.endsWith("Z") ? isoStr : isoStr + "Z").getTime();
const hours = Math.floor(diff / 3_600_000);
const mins = Math.floor((diff % 3_600_000) / 60_000);
return hours > 0 ? `${hours}h ${mins}m ago` : `${mins}m ago`;
}
const LLM_PROVIDERS = [
{
value: "openai",
@@ -169,6 +176,13 @@ export default function SettingsPage() {
enabled: !!currentUser?.is_admin && !!settings?.newsapi_enabled,
staleTime: 60_000,
});
const { data: batchStatus } = useQuery({
queryKey: ["llm-batch-status"],
queryFn: () => adminAPI.getLlmBatchStatus(),
enabled: !!currentUser?.is_admin,
refetchInterval: (query) => query.state.data?.status === "processing" ? 30_000 : false,
});
const [clearingCache, setClearingCache] = useState(false);
const [cacheClearResult, setCacheClearResult] = useState<string | null>(null);
const clearGnewsCache = async () => {
@@ -837,6 +851,16 @@ export default function SettingsPage() {
},
];
if (settings?.llm_provider === "openai" || settings?.llm_provider === "anthropic") {
recurring.push({
key: "llm-batch",
name: "Submit LLM Batch (50% off)",
description: "Send all unbriefed documents to the Batch API for overnight processing at half the token cost. Returns within seconds — results are imported automatically every 30 minutes via the background poller.",
fn: adminAPI.submitLlmBatch,
status: "on-demand",
});
}
const maintenance: ControlItem[] = [
{
key: "backfill-actions",
@@ -891,6 +915,11 @@ export default function SettingsPage() {
<>
<div className="divide-y divide-border">
{recurring.map(renderRow)}
{batchStatus?.status === "processing" && (
<div className="py-2 pl-6 text-xs text-muted-foreground">
Batch in progress · {batchStatus.doc_count} documents · submitted {relativeTime(batchStatus.submitted_at!)}
</div>
)}
{ClearCacheRow}
</div>

View File

@@ -297,4 +297,10 @@ export const adminAPI = {
apiClient.get<{ remaining: number; limit: number }>("/api/admin/newsapi-quota").then((r) => r.data),
clearGnewsCache: () =>
apiClient.post<{ cleared: number }>("/api/admin/clear-gnews-cache").then((r) => r.data),
submitLlmBatch: () =>
apiClient.post("/api/admin/submit-llm-batch").then((r) => r.data),
getLlmBatchStatus: () =>
apiClient.get<{ status: string; batch_id?: string; doc_count?: number; submitted_at?: string }>(
"/api/admin/llm-batch-status"
).then((r) => r.data),
};