Nine efficiency improvements across the data pipeline:
1. NewsAPI OR batching (news_service.py + news_fetcher.py)
- Combine up to 4 bills per NewsAPI call using OR query syntax
- NEWSAPI_BATCH_SIZE=4 means ~4× effective daily quota (100→400 bill-fetches)
- fetch_news_for_bill_batch task; fetch_news_for_active_bills queues batches
2. Google News RSS cache (news_service.py)
- 2-hour Redis cache shared between news_fetcher and trend_scorer
- Eliminates duplicate RSS hits when both workers run against same bill
- clear_gnews_cache() admin helper + admin endpoint
3. pytrends keyword batching (trends_service.py + trend_scorer.py)
- Compare up to 5 bills per pytrends call instead of 1
- get_trends_scores_batch() returns scores in original order
- Reduces pytrends calls by ~5× and associated rate-limit risk
4. GovInfo ETags (govinfo_api.py + document_fetcher.py)
- If-None-Match conditional GET; DocumentUnchangedError on HTTP 304
- ETags stored in Redis (30-day TTL) keyed by MD5(url)
- document_fetcher catches DocumentUnchangedError → {"status": "unchanged"}
5. Anthropic prompt caching (llm_service.py)
- cache_control: {type: ephemeral} on system messages in AnthropicProvider
- Caches the ~700-token system prompt server-side; ~50% cost reduction on
repeated calls within the 5-minute cache window
6. Async sponsor fetch (congress_poller.py)
- New fetch_sponsor_for_bill Celery task replaces blocking get_bill_detail()
inline in poll loop
- Bills saved immediately with sponsor_id=None; sponsor linked async
- Removes 0.25s sleep per new bill from poll hot path
7. Skip doc fetch for procedural actions (congress_poller.py)
- _DOC_PRODUCING_CATEGORIES = {vote, committee_report, presidential, ...}
- fetch_bill_documents only enqueued when action is likely to produce
new GovInfo text (saves ~60–70% of unnecessary document fetch attempts)
8. Adaptive poll frequency (congress_poller.py)
- _is_congress_off_hours(): weekends + before 9AM / after 9PM EST
- Skips poll if off-hours AND last poll < 1 hour ago
- Prevents wasteful polling when Congress is not in session
9. Admin panel additions (admin.py + settings/page.tsx + api.ts)
- GET /api/admin/newsapi-quota → remaining calls today
- POST /api/admin/clear-gnews-cache → flush RSS cache
- Settings page shows NewsAPI quota remaining (amber if < 10)
- "Clear Google News Cache" button in Manual Controls
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
226 lines
8.0 KiB
Python
226 lines
8.0 KiB
Python
from fastapi import APIRouter, Depends
|
|
from sqlalchemy import select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.config import settings
|
|
from app.core.dependencies import get_current_admin, get_current_user
|
|
from app.database import get_db
|
|
from app.models import AppSetting
|
|
from app.models.user import User
|
|
from app.schemas.schemas import SettingUpdate, SettingsResponse
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.get("", response_model=SettingsResponse)
|
|
async def get_settings(
|
|
db: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_current_user),
|
|
):
|
|
"""Return current effective settings (env + DB overrides)."""
|
|
# DB overrides take precedence over env vars
|
|
overrides: dict[str, str] = {}
|
|
result = await db.execute(select(AppSetting))
|
|
for row in result.scalars().all():
|
|
overrides[row.key] = row.value
|
|
|
|
return SettingsResponse(
|
|
llm_provider=overrides.get("llm_provider", settings.LLM_PROVIDER),
|
|
llm_model=overrides.get("llm_model", _current_model(overrides.get("llm_provider", settings.LLM_PROVIDER))),
|
|
congress_poll_interval_minutes=int(overrides.get("congress_poll_interval_minutes", settings.CONGRESS_POLL_INTERVAL_MINUTES)),
|
|
newsapi_enabled=bool(settings.NEWSAPI_KEY),
|
|
pytrends_enabled=settings.PYTRENDS_ENABLED,
|
|
api_keys_configured={
|
|
"openai": bool(settings.OPENAI_API_KEY),
|
|
"anthropic": bool(settings.ANTHROPIC_API_KEY),
|
|
"gemini": bool(settings.GEMINI_API_KEY),
|
|
"ollama": True, # no API key required
|
|
},
|
|
)
|
|
|
|
|
|
@router.put("")
|
|
async def update_setting(
|
|
body: SettingUpdate,
|
|
db: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_current_admin),
|
|
):
|
|
"""Update a runtime setting."""
|
|
ALLOWED_KEYS = {"llm_provider", "llm_model", "congress_poll_interval_minutes"}
|
|
if body.key not in ALLOWED_KEYS:
|
|
from fastapi import HTTPException
|
|
raise HTTPException(status_code=400, detail=f"Allowed setting keys: {ALLOWED_KEYS}")
|
|
|
|
existing = await db.get(AppSetting, body.key)
|
|
if existing:
|
|
existing.value = body.value
|
|
else:
|
|
db.add(AppSetting(key=body.key, value=body.value))
|
|
await db.commit()
|
|
return {"key": body.key, "value": body.value}
|
|
|
|
|
|
@router.post("/test-llm")
|
|
async def test_llm_connection(
|
|
db: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_current_admin),
|
|
):
|
|
"""Ping the configured LLM provider with a minimal request."""
|
|
import asyncio
|
|
prov_row = await db.get(AppSetting, "llm_provider")
|
|
model_row = await db.get(AppSetting, "llm_model")
|
|
provider_name = prov_row.value if prov_row else settings.LLM_PROVIDER
|
|
model_name = model_row.value if model_row else None
|
|
try:
|
|
return await asyncio.to_thread(_ping_provider, provider_name, model_name)
|
|
except Exception as exc:
|
|
return {"status": "error", "detail": str(exc)}
|
|
|
|
|
|
_PING = "Reply with exactly three words: Connection test successful."
|
|
|
|
|
|
def _ping_provider(provider_name: str, model_name: str | None) -> dict:
|
|
if provider_name == "openai":
|
|
from openai import OpenAI
|
|
model = model_name or settings.OPENAI_MODEL
|
|
client = OpenAI(api_key=settings.OPENAI_API_KEY)
|
|
resp = client.chat.completions.create(
|
|
model=model,
|
|
messages=[{"role": "user", "content": _PING}],
|
|
max_tokens=20,
|
|
)
|
|
reply = resp.choices[0].message.content.strip()
|
|
return {"status": "ok", "provider": "openai", "model": model, "reply": reply}
|
|
|
|
if provider_name == "anthropic":
|
|
import anthropic
|
|
model = model_name or settings.ANTHROPIC_MODEL
|
|
client = anthropic.Anthropic(api_key=settings.ANTHROPIC_API_KEY)
|
|
resp = client.messages.create(
|
|
model=model,
|
|
max_tokens=20,
|
|
messages=[{"role": "user", "content": _PING}],
|
|
)
|
|
reply = resp.content[0].text.strip()
|
|
return {"status": "ok", "provider": "anthropic", "model": model, "reply": reply}
|
|
|
|
if provider_name == "gemini":
|
|
import google.generativeai as genai
|
|
model = model_name or settings.GEMINI_MODEL
|
|
genai.configure(api_key=settings.GEMINI_API_KEY)
|
|
resp = genai.GenerativeModel(model_name=model).generate_content(_PING)
|
|
reply = resp.text.strip()
|
|
return {"status": "ok", "provider": "gemini", "model": model, "reply": reply}
|
|
|
|
if provider_name == "ollama":
|
|
import requests as req
|
|
model = model_name or settings.OLLAMA_MODEL
|
|
resp = req.post(
|
|
f"{settings.OLLAMA_BASE_URL}/api/generate",
|
|
json={"model": model, "prompt": _PING, "stream": False},
|
|
timeout=30,
|
|
)
|
|
resp.raise_for_status()
|
|
reply = resp.json().get("response", "").strip()
|
|
return {"status": "ok", "provider": "ollama", "model": model, "reply": reply}
|
|
|
|
raise ValueError(f"Unknown provider: {provider_name}")
|
|
|
|
|
|
@router.get("/llm-models")
|
|
async def list_llm_models(
|
|
provider: str,
|
|
current_user: User = Depends(get_current_admin),
|
|
):
|
|
"""Fetch available models directly from the provider's API."""
|
|
import asyncio
|
|
handlers = {
|
|
"openai": _list_openai_models,
|
|
"anthropic": _list_anthropic_models,
|
|
"gemini": _list_gemini_models,
|
|
"ollama": _list_ollama_models,
|
|
}
|
|
fn = handlers.get(provider)
|
|
if not fn:
|
|
return {"models": [], "error": f"Unknown provider: {provider}"}
|
|
try:
|
|
return await asyncio.to_thread(fn)
|
|
except Exception as exc:
|
|
return {"models": [], "error": str(exc)}
|
|
|
|
|
|
def _list_openai_models() -> dict:
|
|
from openai import OpenAI
|
|
if not settings.OPENAI_API_KEY:
|
|
return {"models": [], "error": "OPENAI_API_KEY not configured"}
|
|
client = OpenAI(api_key=settings.OPENAI_API_KEY)
|
|
all_models = client.models.list().data
|
|
CHAT_PREFIXES = ("gpt-", "o1", "o3", "o4", "chatgpt-")
|
|
EXCLUDE = ("realtime", "audio", "tts", "whisper", "embedding", "dall-e", "instruct")
|
|
filtered = sorted(
|
|
[m.id for m in all_models
|
|
if any(m.id.startswith(p) for p in CHAT_PREFIXES)
|
|
and not any(x in m.id for x in EXCLUDE)],
|
|
reverse=True,
|
|
)
|
|
return {"models": [{"id": m, "name": m} for m in filtered]}
|
|
|
|
|
|
def _list_anthropic_models() -> dict:
|
|
import requests as req
|
|
if not settings.ANTHROPIC_API_KEY:
|
|
return {"models": [], "error": "ANTHROPIC_API_KEY not configured"}
|
|
resp = req.get(
|
|
"https://api.anthropic.com/v1/models",
|
|
headers={
|
|
"x-api-key": settings.ANTHROPIC_API_KEY,
|
|
"anthropic-version": "2023-06-01",
|
|
},
|
|
timeout=10,
|
|
)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
return {
|
|
"models": [
|
|
{"id": m["id"], "name": m.get("display_name", m["id"])}
|
|
for m in data.get("data", [])
|
|
]
|
|
}
|
|
|
|
|
|
def _list_gemini_models() -> dict:
|
|
import google.generativeai as genai
|
|
if not settings.GEMINI_API_KEY:
|
|
return {"models": [], "error": "GEMINI_API_KEY not configured"}
|
|
genai.configure(api_key=settings.GEMINI_API_KEY)
|
|
models = [
|
|
{"id": m.name.replace("models/", ""), "name": m.display_name}
|
|
for m in genai.list_models()
|
|
if "generateContent" in m.supported_generation_methods
|
|
]
|
|
return {"models": sorted(models, key=lambda x: x["id"])}
|
|
|
|
|
|
def _list_ollama_models() -> dict:
|
|
import requests as req
|
|
try:
|
|
resp = req.get(f"{settings.OLLAMA_BASE_URL}/api/tags", timeout=5)
|
|
resp.raise_for_status()
|
|
tags = resp.json().get("models", [])
|
|
return {"models": [{"id": m["name"], "name": m["name"]} for m in tags]}
|
|
except Exception as exc:
|
|
return {"models": [], "error": f"Ollama unreachable: {exc}"}
|
|
|
|
|
|
def _current_model(provider: str) -> str:
|
|
if provider == "openai":
|
|
return settings.OPENAI_MODEL
|
|
elif provider == "anthropic":
|
|
return settings.ANTHROPIC_MODEL
|
|
elif provider == "gemini":
|
|
return settings.GEMINI_MODEL
|
|
elif provider == "ollama":
|
|
return settings.OLLAMA_MODEL
|
|
return "unknown"
|