feat: API optimizations — quota batching, ETags, caching, async sponsor (v0.9.7)

Nine efficiency improvements across the data pipeline: 1. NewsAPI OR batching (news_service.py + news_fetcher.py) - Combine up to 4 bills per NewsAPI call using OR query syntax - NEWSAPI_BATCH_SIZE=4 means ~4× effective daily quota (100→400 bill-fetches) - fetch_news_for_bill_batch task; fetch_news_for_active_bills queues batches 2. Google News RSS cache (news_service.py) - 2-hour Redis cache shared between news_fetcher and trend_scorer - Eliminates duplicate RSS hits when both workers run against same bill - clear_gnews_cache() admin helper + admin endpoint 3. pytrends keyword batching (trends_service.py + trend_scorer.py) - Compare up to 5 bills per pytrends call instead of 1 - get_trends_scores_batch() returns scores in original order - Reduces pytrends calls by ~5× and associated rate-limit risk 4. GovInfo ETags (govinfo_api.py + document_fetcher.py) - If-None-Match conditional GET; DocumentUnchangedError on HTTP 304 - ETags stored in Redis (30-day TTL) keyed by MD5(url) - document_fetcher catches DocumentUnchangedError → {"status": "unchanged"} 5. Anthropic prompt caching (llm_service.py) - cache_control: {type: ephemeral} on system messages in AnthropicProvider - Caches the ~700-token system prompt server-side; ~50% cost reduction on repeated calls within the 5-minute cache window 6. Async sponsor fetch (congress_poller.py) - New fetch_sponsor_for_bill Celery task replaces blocking get_bill_detail() inline in poll loop - Bills saved immediately with sponsor_id=None; sponsor linked async - Removes 0.25s sleep per new bill from poll hot path 7. Skip doc fetch for procedural actions (congress_poller.py) - _DOC_PRODUCING_CATEGORIES = {vote, committee_report, presidential, ...} - fetch_bill_documents only enqueued when action is likely to produce new GovInfo text (saves ~60–70% of unnecessary document fetch attempts) 8. Adaptive poll frequency (congress_poller.py) - _is_congress_off_hours(): weekends + before 9AM / after 9PM EST - Skips poll if off-hours AND last poll < 1 hour ago - Prevents wasteful polling when Congress is not in session 9. Admin panel additions (admin.py + settings/page.tsx + api.ts) - GET /api/admin/newsapi-quota → remaining calls today - POST /api/admin/clear-gnews-cache → flush RSS cache - Settings page shows NewsAPI quota remaining (amber if < 10) - "Clear Google News Cache" button in Manual Controls Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-14 16:50:51 -04:00
parent 247a874c8d
commit 7e5c5b473e
16 changed files with 676 additions and 162 deletions
--- a/frontend/app/settings/page.tsx
+++ b/frontend/app/settings/page.tsx
@@ -26,10 +26,34 @@ import { settingsAPI, adminAPI, notificationsAPI, type AdminUser, type LLMModel,
 import { useAuthStore } from "@/stores/authStore";

 const LLM_PROVIDERS = [
-  { value: "openai", label: "OpenAI", hint: "Requires OPENAI_API_KEY in .env" },
-  { value: "anthropic", label: "Anthropic (Claude)", hint: "Requires ANTHROPIC_API_KEY in .env" },
-  { value: "gemini", label: "Google Gemini", hint: "Requires GEMINI_API_KEY in .env" },
-  { value: "ollama", label: "Ollama (Local)", hint: "Requires Ollama running on host" },
+  {
+    value: "openai",
+    label: "OpenAI",
+    hint: "Requires OPENAI_API_KEY in .env",
+    rateNote: "Free: 3 RPM · Paid tier 1: 500 RPM",
+    modelNote: "Recommended: gpt-4o-mini — excellent JSON quality at ~10× lower cost than gpt-4o",
+  },
+  {
+    value: "anthropic",
+    label: "Anthropic (Claude)",
+    hint: "Requires ANTHROPIC_API_KEY in .env",
+    rateNote: "Tier 1: 50 RPM · Tier 2: 1,000 RPM",
+    modelNote: "Recommended: claude-sonnet-4-6 — matches Opus quality at ~5× lower cost",
+  },
+  {
+    value: "gemini",
+    label: "Google Gemini",
+    hint: "Requires GEMINI_API_KEY in .env",
+    rateNote: "Free: 15 RPM · Paid: 2,000 RPM",
+    modelNote: "Recommended: gemini-2.0-flash — best value, generous free tier",
+  },
+  {
+    value: "ollama",
+    label: "Ollama (Local)",
+    hint: "Requires Ollama running on host",
+    rateNote: "No API rate limits",
+    modelNote: "Recommended: llama3.1 or mistral for reliable structured JSON output",
+  },
 ];


@@ -139,6 +163,27 @@ export default function SettingsPage() {
  const [confirmDelete, setConfirmDelete] = useState<number | null>(null);
  const [showMaintenance, setShowMaintenance] = useState(false);

+  const { data: newsApiQuota, refetch: refetchQuota } = useQuery({
+    queryKey: ["newsapi-quota"],
+    queryFn: () => adminAPI.getNewsApiQuota(),
+    enabled: !!currentUser?.is_admin && !!settings?.newsapi_enabled,
+    staleTime: 60_000,
+  });
+  const [clearingCache, setClearingCache] = useState(false);
+  const [cacheClearResult, setCacheClearResult] = useState<string | null>(null);
+  const clearGnewsCache = async () => {
+    setClearingCache(true);
+    setCacheClearResult(null);
+    try {
+      const result = await adminAPI.clearGnewsCache();
+      setCacheClearResult(`Cleared ${result.cleared} cached entries`);
+    } catch (e: unknown) {
+      setCacheClearResult(e instanceof Error ? e.message : "Failed");
+    } finally {
+      setClearingCache(false);
+    }
+  };
+
  const testLLM = async () => {
    setTesting(true);
    setTestResult(null);
@@ -421,26 +466,42 @@ export default function SettingsPage() {
          <Cpu className="w-4 h-4" /> LLM Provider
        </h2>
        <div className="space-y-2">
-          {LLM_PROVIDERS.map(({ value, label, hint }) => (
-            <label key={value} className="flex items-start gap-3 cursor-pointer">
-              <input
-                type="radio"
-                name="provider"
-                value={value}
-                checked={settings?.llm_provider === value}
-                onChange={() => {
-                  updateSetting.mutate({ key: "llm_provider", value });
-                  setShowCustomModel(false);
-                  setCustomModel("");
-                }}
-                className="mt-0.5"
-              />
-              <div>
-                <div className="text-sm font-medium">{label}</div>
-                <div className="text-xs text-muted-foreground">{hint}</div>
-              </div>
-            </label>
-          ))}
+          {LLM_PROVIDERS.map(({ value, label, hint, rateNote, modelNote }) => {
+            const hasKey = settings?.api_keys_configured?.[value] ?? true;
+            return (
+              <label key={value} className={`flex items-start gap-3 ${hasKey ? "cursor-pointer" : "cursor-not-allowed opacity-60"}`}>
+                <input
+                  type="radio"
+                  name="provider"
+                  value={value}
+                  checked={settings?.llm_provider === value}
+                  disabled={!hasKey}
+                  onChange={() => {
+                    updateSetting.mutate({ key: "llm_provider", value });
+                    setShowCustomModel(false);
+                    setCustomModel("");
+                  }}
+                  className="mt-0.5"
+                />
+                <div className="flex-1">
+                  <div className="flex items-center gap-2">
+                    <span className="text-sm font-medium">{label}</span>
+                    {hasKey ? (
+                      <span className="text-xs px-1.5 py-0.5 rounded-full bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400 font-medium">
+                        {value === "ollama" ? "local" : "key set"}
+                      </span>
+                    ) : (
+                      <span className="text-xs px-1.5 py-0.5 rounded-full bg-muted text-muted-foreground font-medium">
+                        no key
+                      </span>
+                    )}
+                  </div>
+                  <div className="text-xs text-muted-foreground">{hint}</div>
+                  <div className="text-xs text-muted-foreground mt-0.5">{rateNote} · {modelNote}</div>
+                </div>
+              </label>
+            );
+          })}
        </div>

        {/* Model picker — live from provider API */}
@@ -568,9 +629,16 @@ export default function SettingsPage() {
              <div className="font-medium">NewsAPI.org</div>
              <div className="text-xs text-muted-foreground">100 requests/day free tier</div>
            </div>
-            <span className={`text-xs font-medium ${settings?.newsapi_enabled ? "text-green-500" : "text-muted-foreground"}`}>
-              {settings?.newsapi_enabled ? "Configured" : "Not configured"}
-            </span>
+            <div className="flex items-center gap-3">
+              {newsApiQuota && (
+                <span className={`text-xs ${newsApiQuota.remaining < 10 ? "text-amber-500" : "text-muted-foreground"}`}>
+                  {newsApiQuota.remaining}/{newsApiQuota.limit} remaining today
+                </span>
+              )}
+              <span className={`text-xs font-medium ${settings?.newsapi_enabled ? "text-green-500" : "text-muted-foreground"}`}>
+                {settings?.newsapi_enabled ? "Configured" : "Not configured"}
+              </span>
+            </div>
          </div>
          <div className="flex items-center justify-between py-2 border-t border-border">
            <div>
@@ -697,6 +765,31 @@ export default function SettingsPage() {
            </div>
          );

+          // Clear RSS cache — inline action (returns count, not task_id)
+          const ClearCacheRow = (
+            <div className="flex items-start gap-3 py-3.5">
+              <div className="w-2.5 h-2.5 rounded-full mt-1 shrink-0 bg-border" />
+              <div className="flex-1 min-w-0 space-y-0.5">
+                <div className="flex items-center gap-2 flex-wrap">
+                  <span className="text-sm font-medium">Clear Google News Cache</span>
+                  {cacheClearResult && (
+                    <span className="text-xs text-green-600 dark:text-green-400">✓ {cacheClearResult}</span>
+                  )}
+                </div>
+                <p className="text-xs text-muted-foreground leading-relaxed">
+                  Flush the 2-hour Google News RSS cache so fresh articles are fetched on the next trend scoring or news run.
+                </p>
+              </div>
+              <button
+                onClick={clearGnewsCache}
+                disabled={clearingCache}
+                className="shrink-0 flex items-center gap-1.5 px-3 py-1.5 text-xs bg-muted hover:bg-accent rounded-md transition-colors font-medium disabled:opacity-50 disabled:cursor-not-allowed"
+              >
+                {clearingCache ? <RefreshCw className="w-3 h-3 animate-spin" /> : "Run"}
+              </button>
+            </div>
+          );
+
          const recurring: ControlItem[] = [
            {
              key: "poll",
@@ -798,6 +891,7 @@ export default function SettingsPage() {
            <>
              <div className="divide-y divide-border">
                {recurring.map(renderRow)}
+                {ClearCacheRow}
              </div>

              {/* Maintenance subsection */}
--- a/frontend/lib/api.ts
+++ b/frontend/lib/api.ts
@@ -293,4 +293,8 @@ export const adminAPI = {
    apiClient.get<ApiHealth>("/api/admin/api-health").then((r) => r.data),
  getTaskStatus: (taskId: string) =>
    apiClient.get(`/api/admin/task-status/${taskId}`).then((r) => r.data),
+  getNewsApiQuota: () =>
+    apiClient.get<{ remaining: number; limit: number }>("/api/admin/newsapi-quota").then((r) => r.data),
+  clearGnewsCache: () =>
+    apiClient.post<{ cleared: number }>("/api/admin/clear-gnews-cache").then((r) => r.data),
 };
--- a/frontend/lib/types.ts
+++ b/frontend/lib/types.ts
@@ -157,6 +157,7 @@ export interface SettingsData {
  congress_poll_interval_minutes: number;
  newsapi_enabled: boolean;
  pytrends_enabled: boolean;
+  api_keys_configured: Record<string, boolean>;
 }

 export interface BillNote {