feat: LLM Batch API — OpenAI + Anthropic 50% cost reduction (v0.9.8)

Submit up to 1000 unbriefed documents to the provider Batch API in one shot instead of individual synchronous LLM calls. Results are polled every 30 minutes via a new Celery beat task and imported automatically. - New worker: llm_batch_processor.py - submit_llm_batch: guards against duplicate batches, builds JSONL (OpenAI) or request list (Anthropic), stores state in AppSetting - poll_llm_batch_results: checks batch status, imports completed results with idempotency, emits notifications + triggers news fetch - celery_app: register worker, route to llm queue, beat every 30 min - admin API: POST /submit-llm-batch + GET /llm-batch-status endpoints - Frontend: submitLlmBatch + getLlmBatchStatus in adminAPI; settings page shows batch control row (openai/anthropic only) with live progress line while batch is processing Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-14 17:35:15 -04:00
parent 7e5c5b473e
commit cba19c7bb3
5 changed files with 467 additions and 0 deletions
--- a/frontend/app/settings/page.tsx
+++ b/frontend/app/settings/page.tsx
@@ -25,6 +25,13 @@ import Link from "next/link";
 import { settingsAPI, adminAPI, notificationsAPI, type AdminUser, type LLMModel, type ApiHealthResult } from "@/lib/api";
 import { useAuthStore } from "@/stores/authStore";

+function relativeTime(isoStr: string): string {
+  const diff = Date.now() - new Date(isoStr.endsWith("Z") ? isoStr : isoStr + "Z").getTime();
+  const hours = Math.floor(diff / 3_600_000);
+  const mins = Math.floor((diff % 3_600_000) / 60_000);
+  return hours > 0 ? `${hours}h ${mins}m ago` : `${mins}m ago`;
+}
+
 const LLM_PROVIDERS = [
  {
    value: "openai",
@@ -169,6 +176,13 @@ export default function SettingsPage() {
    enabled: !!currentUser?.is_admin && !!settings?.newsapi_enabled,
    staleTime: 60_000,
  });
+  const { data: batchStatus } = useQuery({
+    queryKey: ["llm-batch-status"],
+    queryFn: () => adminAPI.getLlmBatchStatus(),
+    enabled: !!currentUser?.is_admin,
+    refetchInterval: (query) => query.state.data?.status === "processing" ? 30_000 : false,
+  });
+
  const [clearingCache, setClearingCache] = useState(false);
  const [cacheClearResult, setCacheClearResult] = useState<string | null>(null);
  const clearGnewsCache = async () => {
@@ -837,6 +851,16 @@ export default function SettingsPage() {
            },
          ];

+          if (settings?.llm_provider === "openai" || settings?.llm_provider === "anthropic") {
+            recurring.push({
+              key: "llm-batch",
+              name: "Submit LLM Batch (50% off)",
+              description: "Send all unbriefed documents to the Batch API for overnight processing at half the token cost. Returns within seconds — results are imported automatically every 30 minutes via the background poller.",
+              fn: adminAPI.submitLlmBatch,
+              status: "on-demand",
+            });
+          }
+
          const maintenance: ControlItem[] = [
            {
              key: "backfill-actions",
@@ -891,6 +915,11 @@ export default function SettingsPage() {
            <>
              <div className="divide-y divide-border">
                {recurring.map(renderRow)}
+                {batchStatus?.status === "processing" && (
+                  <div className="py-2 pl-6 text-xs text-muted-foreground">
+                    Batch in progress · {batchStatus.doc_count} documents · submitted {relativeTime(batchStatus.submitted_at!)}
+                  </div>
+                )}
                {ClearCacheRow}
              </div>