feat: API optimizations — quota batching, ETags, caching, async sponsor (v0.9.7)

Nine efficiency improvements across the data pipeline:

1. NewsAPI OR batching (news_service.py + news_fetcher.py)
   - Combine up to 4 bills per NewsAPI call using OR query syntax
   - NEWSAPI_BATCH_SIZE=4 means ~4× effective daily quota (100→400 bill-fetches)
   - fetch_news_for_bill_batch task; fetch_news_for_active_bills queues batches

2. Google News RSS cache (news_service.py)
   - 2-hour Redis cache shared between news_fetcher and trend_scorer
   - Eliminates duplicate RSS hits when both workers run against same bill
   - clear_gnews_cache() admin helper + admin endpoint

3. pytrends keyword batching (trends_service.py + trend_scorer.py)
   - Compare up to 5 bills per pytrends call instead of 1
   - get_trends_scores_batch() returns scores in original order
   - Reduces pytrends calls by ~5× and associated rate-limit risk

4. GovInfo ETags (govinfo_api.py + document_fetcher.py)
   - If-None-Match conditional GET; DocumentUnchangedError on HTTP 304
   - ETags stored in Redis (30-day TTL) keyed by MD5(url)
   - document_fetcher catches DocumentUnchangedError → {"status": "unchanged"}

5. Anthropic prompt caching (llm_service.py)
   - cache_control: {type: ephemeral} on system messages in AnthropicProvider
   - Caches the ~700-token system prompt server-side; ~50% cost reduction on
     repeated calls within the 5-minute cache window

6. Async sponsor fetch (congress_poller.py)
   - New fetch_sponsor_for_bill Celery task replaces blocking get_bill_detail()
     inline in poll loop
   - Bills saved immediately with sponsor_id=None; sponsor linked async
   - Removes 0.25s sleep per new bill from poll hot path

7. Skip doc fetch for procedural actions (congress_poller.py)
   - _DOC_PRODUCING_CATEGORIES = {vote, committee_report, presidential, ...}
   - fetch_bill_documents only enqueued when action is likely to produce
     new GovInfo text (saves ~60–70% of unnecessary document fetch attempts)

8. Adaptive poll frequency (congress_poller.py)
   - _is_congress_off_hours(): weekends + before 9AM / after 9PM EST
   - Skips poll if off-hours AND last poll < 1 hour ago
   - Prevents wasteful polling when Congress is not in session

9. Admin panel additions (admin.py + settings/page.tsx + api.ts)
   - GET /api/admin/newsapi-quota → remaining calls today
   - POST /api/admin/clear-gnews-cache → flush RSS cache
   - Settings page shows NewsAPI quota remaining (amber if < 10)
   - "Clear Google News Cache" button in Manual Controls

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Jack Levy
2026-03-14 16:50:51 -04:00
parent 247a874c8d
commit 7e5c5b473e
16 changed files with 676 additions and 162 deletions

View File

@@ -26,10 +26,34 @@ import { settingsAPI, adminAPI, notificationsAPI, type AdminUser, type LLMModel,
import { useAuthStore } from "@/stores/authStore";
const LLM_PROVIDERS = [
{ value: "openai", label: "OpenAI", hint: "Requires OPENAI_API_KEY in .env" },
{ value: "anthropic", label: "Anthropic (Claude)", hint: "Requires ANTHROPIC_API_KEY in .env" },
{ value: "gemini", label: "Google Gemini", hint: "Requires GEMINI_API_KEY in .env" },
{ value: "ollama", label: "Ollama (Local)", hint: "Requires Ollama running on host" },
{
value: "openai",
label: "OpenAI",
hint: "Requires OPENAI_API_KEY in .env",
rateNote: "Free: 3 RPM · Paid tier 1: 500 RPM",
modelNote: "Recommended: gpt-4o-mini — excellent JSON quality at ~10× lower cost than gpt-4o",
},
{
value: "anthropic",
label: "Anthropic (Claude)",
hint: "Requires ANTHROPIC_API_KEY in .env",
rateNote: "Tier 1: 50 RPM · Tier 2: 1,000 RPM",
modelNote: "Recommended: claude-sonnet-4-6 — matches Opus quality at ~5× lower cost",
},
{
value: "gemini",
label: "Google Gemini",
hint: "Requires GEMINI_API_KEY in .env",
rateNote: "Free: 15 RPM · Paid: 2,000 RPM",
modelNote: "Recommended: gemini-2.0-flash — best value, generous free tier",
},
{
value: "ollama",
label: "Ollama (Local)",
hint: "Requires Ollama running on host",
rateNote: "No API rate limits",
modelNote: "Recommended: llama3.1 or mistral for reliable structured JSON output",
},
];
@@ -139,6 +163,27 @@ export default function SettingsPage() {
const [confirmDelete, setConfirmDelete] = useState<number | null>(null);
const [showMaintenance, setShowMaintenance] = useState(false);
const { data: newsApiQuota, refetch: refetchQuota } = useQuery({
queryKey: ["newsapi-quota"],
queryFn: () => adminAPI.getNewsApiQuota(),
enabled: !!currentUser?.is_admin && !!settings?.newsapi_enabled,
staleTime: 60_000,
});
const [clearingCache, setClearingCache] = useState(false);
const [cacheClearResult, setCacheClearResult] = useState<string | null>(null);
const clearGnewsCache = async () => {
setClearingCache(true);
setCacheClearResult(null);
try {
const result = await adminAPI.clearGnewsCache();
setCacheClearResult(`Cleared ${result.cleared} cached entries`);
} catch (e: unknown) {
setCacheClearResult(e instanceof Error ? e.message : "Failed");
} finally {
setClearingCache(false);
}
};
const testLLM = async () => {
setTesting(true);
setTestResult(null);
@@ -421,26 +466,42 @@ export default function SettingsPage() {
<Cpu className="w-4 h-4" /> LLM Provider
</h2>
<div className="space-y-2">
{LLM_PROVIDERS.map(({ value, label, hint }) => (
<label key={value} className="flex items-start gap-3 cursor-pointer">
<input
type="radio"
name="provider"
value={value}
checked={settings?.llm_provider === value}
onChange={() => {
updateSetting.mutate({ key: "llm_provider", value });
setShowCustomModel(false);
setCustomModel("");
}}
className="mt-0.5"
/>
<div>
<div className="text-sm font-medium">{label}</div>
<div className="text-xs text-muted-foreground">{hint}</div>
</div>
</label>
))}
{LLM_PROVIDERS.map(({ value, label, hint, rateNote, modelNote }) => {
const hasKey = settings?.api_keys_configured?.[value] ?? true;
return (
<label key={value} className={`flex items-start gap-3 ${hasKey ? "cursor-pointer" : "cursor-not-allowed opacity-60"}`}>
<input
type="radio"
name="provider"
value={value}
checked={settings?.llm_provider === value}
disabled={!hasKey}
onChange={() => {
updateSetting.mutate({ key: "llm_provider", value });
setShowCustomModel(false);
setCustomModel("");
}}
className="mt-0.5"
/>
<div className="flex-1">
<div className="flex items-center gap-2">
<span className="text-sm font-medium">{label}</span>
{hasKey ? (
<span className="text-xs px-1.5 py-0.5 rounded-full bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400 font-medium">
{value === "ollama" ? "local" : "key set"}
</span>
) : (
<span className="text-xs px-1.5 py-0.5 rounded-full bg-muted text-muted-foreground font-medium">
no key
</span>
)}
</div>
<div className="text-xs text-muted-foreground">{hint}</div>
<div className="text-xs text-muted-foreground mt-0.5">{rateNote} · {modelNote}</div>
</div>
</label>
);
})}
</div>
{/* Model picker — live from provider API */}
@@ -568,9 +629,16 @@ export default function SettingsPage() {
<div className="font-medium">NewsAPI.org</div>
<div className="text-xs text-muted-foreground">100 requests/day free tier</div>
</div>
<span className={`text-xs font-medium ${settings?.newsapi_enabled ? "text-green-500" : "text-muted-foreground"}`}>
{settings?.newsapi_enabled ? "Configured" : "Not configured"}
</span>
<div className="flex items-center gap-3">
{newsApiQuota && (
<span className={`text-xs ${newsApiQuota.remaining < 10 ? "text-amber-500" : "text-muted-foreground"}`}>
{newsApiQuota.remaining}/{newsApiQuota.limit} remaining today
</span>
)}
<span className={`text-xs font-medium ${settings?.newsapi_enabled ? "text-green-500" : "text-muted-foreground"}`}>
{settings?.newsapi_enabled ? "Configured" : "Not configured"}
</span>
</div>
</div>
<div className="flex items-center justify-between py-2 border-t border-border">
<div>
@@ -697,6 +765,31 @@ export default function SettingsPage() {
</div>
);
// Clear RSS cache — inline action (returns count, not task_id)
const ClearCacheRow = (
<div className="flex items-start gap-3 py-3.5">
<div className="w-2.5 h-2.5 rounded-full mt-1 shrink-0 bg-border" />
<div className="flex-1 min-w-0 space-y-0.5">
<div className="flex items-center gap-2 flex-wrap">
<span className="text-sm font-medium">Clear Google News Cache</span>
{cacheClearResult && (
<span className="text-xs text-green-600 dark:text-green-400"> {cacheClearResult}</span>
)}
</div>
<p className="text-xs text-muted-foreground leading-relaxed">
Flush the 2-hour Google News RSS cache so fresh articles are fetched on the next trend scoring or news run.
</p>
</div>
<button
onClick={clearGnewsCache}
disabled={clearingCache}
className="shrink-0 flex items-center gap-1.5 px-3 py-1.5 text-xs bg-muted hover:bg-accent rounded-md transition-colors font-medium disabled:opacity-50 disabled:cursor-not-allowed"
>
{clearingCache ? <RefreshCw className="w-3 h-3 animate-spin" /> : "Run"}
</button>
</div>
);
const recurring: ControlItem[] = [
{
key: "poll",
@@ -798,6 +891,7 @@ export default function SettingsPage() {
<>
<div className="divide-y divide-border">
{recurring.map(renderRow)}
{ClearCacheRow}
</div>
{/* Maintenance subsection */}

View File

@@ -293,4 +293,8 @@ export const adminAPI = {
apiClient.get<ApiHealth>("/api/admin/api-health").then((r) => r.data),
getTaskStatus: (taskId: string) =>
apiClient.get(`/api/admin/task-status/${taskId}`).then((r) => r.data),
getNewsApiQuota: () =>
apiClient.get<{ remaining: number; limit: number }>("/api/admin/newsapi-quota").then((r) => r.data),
clearGnewsCache: () =>
apiClient.post<{ cleared: number }>("/api/admin/clear-gnews-cache").then((r) => r.data),
};

View File

@@ -157,6 +157,7 @@ export interface SettingsData {
congress_poll_interval_minutes: number;
newsapi_enabled: boolean;
pytrends_enabled: boolean;
api_keys_configured: Record<string, boolean>;
}
export interface BillNote {