feat: LLM Batch API — OpenAI + Anthropic 50% cost reduction (v0.9.8)
Submit up to 1000 unbriefed documents to the provider Batch API in one
shot instead of individual synchronous LLM calls. Results are polled
every 30 minutes via a new Celery beat task and imported automatically.
- New worker: llm_batch_processor.py
- submit_llm_batch: guards against duplicate batches, builds JSONL
(OpenAI) or request list (Anthropic), stores state in AppSetting
- poll_llm_batch_results: checks batch status, imports completed
results with idempotency, emits notifications + triggers news fetch
- celery_app: register worker, route to llm queue, beat every 30 min
- admin API: POST /submit-llm-batch + GET /llm-batch-status endpoints
- Frontend: submitLlmBatch + getLlmBatchStatus in adminAPI; settings
page shows batch control row (openai/anthropic only) with live
progress line while batch is processing
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -25,6 +25,13 @@ import Link from "next/link";
|
||||
import { settingsAPI, adminAPI, notificationsAPI, type AdminUser, type LLMModel, type ApiHealthResult } from "@/lib/api";
|
||||
import { useAuthStore } from "@/stores/authStore";
|
||||
|
||||
function relativeTime(isoStr: string): string {
|
||||
const diff = Date.now() - new Date(isoStr.endsWith("Z") ? isoStr : isoStr + "Z").getTime();
|
||||
const hours = Math.floor(diff / 3_600_000);
|
||||
const mins = Math.floor((diff % 3_600_000) / 60_000);
|
||||
return hours > 0 ? `${hours}h ${mins}m ago` : `${mins}m ago`;
|
||||
}
|
||||
|
||||
const LLM_PROVIDERS = [
|
||||
{
|
||||
value: "openai",
|
||||
@@ -169,6 +176,13 @@ export default function SettingsPage() {
|
||||
enabled: !!currentUser?.is_admin && !!settings?.newsapi_enabled,
|
||||
staleTime: 60_000,
|
||||
});
|
||||
const { data: batchStatus } = useQuery({
|
||||
queryKey: ["llm-batch-status"],
|
||||
queryFn: () => adminAPI.getLlmBatchStatus(),
|
||||
enabled: !!currentUser?.is_admin,
|
||||
refetchInterval: (query) => query.state.data?.status === "processing" ? 30_000 : false,
|
||||
});
|
||||
|
||||
const [clearingCache, setClearingCache] = useState(false);
|
||||
const [cacheClearResult, setCacheClearResult] = useState<string | null>(null);
|
||||
const clearGnewsCache = async () => {
|
||||
@@ -837,6 +851,16 @@ export default function SettingsPage() {
|
||||
},
|
||||
];
|
||||
|
||||
if (settings?.llm_provider === "openai" || settings?.llm_provider === "anthropic") {
|
||||
recurring.push({
|
||||
key: "llm-batch",
|
||||
name: "Submit LLM Batch (50% off)",
|
||||
description: "Send all unbriefed documents to the Batch API for overnight processing at half the token cost. Returns within seconds — results are imported automatically every 30 minutes via the background poller.",
|
||||
fn: adminAPI.submitLlmBatch,
|
||||
status: "on-demand",
|
||||
});
|
||||
}
|
||||
|
||||
const maintenance: ControlItem[] = [
|
||||
{
|
||||
key: "backfill-actions",
|
||||
@@ -891,6 +915,11 @@ export default function SettingsPage() {
|
||||
<>
|
||||
<div className="divide-y divide-border">
|
||||
{recurring.map(renderRow)}
|
||||
{batchStatus?.status === "processing" && (
|
||||
<div className="py-2 pl-6 text-xs text-muted-foreground">
|
||||
Batch in progress · {batchStatus.doc_count} documents · submitted {relativeTime(batchStatus.submitted_at!)}
|
||||
</div>
|
||||
)}
|
||||
{ClearCacheRow}
|
||||
</div>
|
||||
|
||||
|
||||
Reference in New Issue
Block a user