Add bill action pipeline, admin health panel, and LLM provider fixes
- Fetch bill actions from Congress.gov and populate the action timeline - Add nightly batch task and beat schedule for active bill actions - Add admin reprocess endpoint for per-bill debugging - Add BriefPanel with "What Changed" view and version history - Add External API Health section with per-source latency testing - Redesign Manual Controls as health panel with status dots and descriptions - Add Resume Analysis task for stalled LLM jobs - Add Backfill Dates & Links task for bills with null metadata - Fix LLM provider/model DB overrides being ignored (env vars used instead) - Fix Gemini 404: gemini-1.5-pro deprecated → gemini-2.0-flash - Fix Anthropic models list: use REST API directly (SDK too old for .models) - Replace test-LLM full analysis with lightweight ping (max_tokens=20) - Add has_document field to BillDetail; show "No bill text published" state - Fix "Introduced: —" showing for bills with null introduced_date - Add bills_missing_sponsor and bills_missing_metadata to admin stats - Add GovInfo health check using /collections endpoint (fixes 500 from /packages) Authored-By: Jack Levy
This commit is contained in:
@@ -106,6 +106,30 @@ async def get_stats(
|
||||
AND jsonb_typeof(key_points->0) = 'string'
|
||||
""")
|
||||
)).scalar()
|
||||
# Bills with null sponsor
|
||||
bills_missing_sponsor = (await db.execute(
|
||||
text("SELECT COUNT(*) FROM bills WHERE sponsor_id IS NULL")
|
||||
)).scalar()
|
||||
# Bills with null metadata (introduced_date / chamber / congress_url)
|
||||
bills_missing_metadata = (await db.execute(
|
||||
text("SELECT COUNT(*) FROM bills WHERE introduced_date IS NULL OR chamber IS NULL OR congress_url IS NULL")
|
||||
)).scalar()
|
||||
# Bills with no document record at all (text not yet published on GovInfo)
|
||||
no_text_bills = (await db.execute(
|
||||
text("""
|
||||
SELECT COUNT(*) FROM bills b
|
||||
LEFT JOIN bill_documents bd ON bd.bill_id = b.bill_id
|
||||
WHERE bd.id IS NULL
|
||||
""")
|
||||
)).scalar()
|
||||
# Documents that have text but no brief (LLM not yet run / failed)
|
||||
pending_llm = (await db.execute(
|
||||
text("""
|
||||
SELECT COUNT(*) FROM bill_documents bd
|
||||
LEFT JOIN bill_briefs bb ON bb.document_id = bd.id
|
||||
WHERE bb.id IS NULL AND bd.raw_text IS NOT NULL
|
||||
""")
|
||||
)).scalar()
|
||||
return {
|
||||
"total_bills": total_bills,
|
||||
"docs_fetched": docs_fetched,
|
||||
@@ -113,6 +137,10 @@ async def get_stats(
|
||||
"full_briefs": full_briefs,
|
||||
"amendment_briefs": amendment_briefs,
|
||||
"uncited_briefs": uncited_briefs,
|
||||
"no_text_bills": no_text_bills,
|
||||
"pending_llm": pending_llm,
|
||||
"bills_missing_sponsor": bills_missing_sponsor,
|
||||
"bills_missing_metadata": bills_missing_metadata,
|
||||
"remaining": total_bills - total_briefs,
|
||||
}
|
||||
|
||||
@@ -155,6 +183,22 @@ async def trigger_fetch_actions(current_user: User = Depends(get_current_admin))
|
||||
return {"task_id": task.id, "status": "queued"}
|
||||
|
||||
|
||||
@router.post("/backfill-metadata")
|
||||
async def backfill_metadata(current_user: User = Depends(get_current_admin)):
|
||||
"""Fill in null introduced_date, congress_url, chamber for existing bills."""
|
||||
from app.workers.congress_poller import backfill_bill_metadata
|
||||
task = backfill_bill_metadata.delay()
|
||||
return {"task_id": task.id, "status": "queued"}
|
||||
|
||||
|
||||
@router.post("/resume-analysis")
|
||||
async def resume_analysis(current_user: User = Depends(get_current_admin)):
|
||||
"""Re-queue LLM processing for docs with no brief, and document fetching for bills with no doc."""
|
||||
from app.workers.llm_processor import resume_pending_analysis
|
||||
task = resume_pending_analysis.delay()
|
||||
return {"task_id": task.id, "status": "queued"}
|
||||
|
||||
|
||||
@router.post("/trigger-trend-scores")
|
||||
async def trigger_trend_scores(current_user: User = Depends(get_current_admin)):
|
||||
from app.workers.trend_scorer import calculate_all_trend_scores
|
||||
@@ -172,6 +216,113 @@ async def reprocess_bill(bill_id: str, current_user: User = Depends(get_current_
|
||||
return {"task_ids": {"documents": doc_task.id, "actions": actions_task.id}}
|
||||
|
||||
|
||||
@router.get("/api-health")
|
||||
async def api_health(current_user: User = Depends(get_current_admin)):
|
||||
"""Test each external API and return status + latency for each."""
|
||||
import asyncio
|
||||
results = await asyncio.gather(
|
||||
asyncio.to_thread(_test_congress),
|
||||
asyncio.to_thread(_test_govinfo),
|
||||
asyncio.to_thread(_test_newsapi),
|
||||
asyncio.to_thread(_test_gnews),
|
||||
return_exceptions=True,
|
||||
)
|
||||
keys = ["congress_gov", "govinfo", "newsapi", "google_news"]
|
||||
return {
|
||||
k: r if isinstance(r, dict) else {"status": "error", "detail": str(r)}
|
||||
for k, r in zip(keys, results)
|
||||
}
|
||||
|
||||
|
||||
def _timed(fn):
|
||||
"""Run fn(), return its dict merged with latency_ms."""
|
||||
import time as _time
|
||||
t0 = _time.perf_counter()
|
||||
result = fn()
|
||||
result["latency_ms"] = round((_time.perf_counter() - t0) * 1000)
|
||||
return result
|
||||
|
||||
|
||||
def _test_congress() -> dict:
|
||||
from app.config import settings
|
||||
from app.services import congress_api
|
||||
if not settings.DATA_GOV_API_KEY:
|
||||
return {"status": "error", "detail": "DATA_GOV_API_KEY not configured"}
|
||||
def _call():
|
||||
data = congress_api.get_bills(119, limit=1)
|
||||
count = data.get("pagination", {}).get("count") or len(data.get("bills", []))
|
||||
return {"status": "ok", "detail": f"{count:,} bills available in 119th Congress"}
|
||||
try:
|
||||
return _timed(_call)
|
||||
except Exception as exc:
|
||||
return {"status": "error", "detail": str(exc)}
|
||||
|
||||
|
||||
def _test_govinfo() -> dict:
|
||||
from app.config import settings
|
||||
import requests as req
|
||||
if not settings.DATA_GOV_API_KEY:
|
||||
return {"status": "error", "detail": "DATA_GOV_API_KEY not configured"}
|
||||
def _call():
|
||||
# /collections lists all available collections — simple health check endpoint
|
||||
resp = req.get(
|
||||
"https://api.govinfo.gov/collections",
|
||||
params={"api_key": settings.DATA_GOV_API_KEY},
|
||||
timeout=15,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
collections = data.get("collections", [])
|
||||
bills_col = next((c for c in collections if c.get("collectionCode") == "BILLS"), None)
|
||||
if bills_col:
|
||||
count = bills_col.get("packageCount", "?")
|
||||
return {"status": "ok", "detail": f"BILLS collection: {count:,} packages" if isinstance(count, int) else "GovInfo reachable, BILLS collection found"}
|
||||
return {"status": "ok", "detail": f"GovInfo reachable — {len(collections)} collections available"}
|
||||
try:
|
||||
return _timed(_call)
|
||||
except Exception as exc:
|
||||
return {"status": "error", "detail": str(exc)}
|
||||
|
||||
|
||||
def _test_newsapi() -> dict:
|
||||
from app.config import settings
|
||||
import requests as req
|
||||
if not settings.NEWSAPI_KEY:
|
||||
return {"status": "skipped", "detail": "NEWSAPI_KEY not configured"}
|
||||
def _call():
|
||||
resp = req.get(
|
||||
"https://newsapi.org/v2/top-headlines",
|
||||
params={"country": "us", "pageSize": 1, "apiKey": settings.NEWSAPI_KEY},
|
||||
timeout=10,
|
||||
)
|
||||
data = resp.json()
|
||||
if data.get("status") != "ok":
|
||||
return {"status": "error", "detail": data.get("message", "Unknown error")}
|
||||
return {"status": "ok", "detail": f"{data.get('totalResults', 0):,} headlines available"}
|
||||
try:
|
||||
return _timed(_call)
|
||||
except Exception as exc:
|
||||
return {"status": "error", "detail": str(exc)}
|
||||
|
||||
|
||||
def _test_gnews() -> dict:
|
||||
import requests as req
|
||||
def _call():
|
||||
resp = req.get(
|
||||
"https://news.google.com/rss/search",
|
||||
params={"q": "congress", "hl": "en-US", "gl": "US", "ceid": "US:en"},
|
||||
timeout=10,
|
||||
headers={"User-Agent": "Mozilla/5.0"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
item_count = resp.text.count("<item>")
|
||||
return {"status": "ok", "detail": f"{item_count} items in test RSS feed"}
|
||||
try:
|
||||
return _timed(_call)
|
||||
except Exception as exc:
|
||||
return {"status": "error", "detail": str(exc)}
|
||||
|
||||
|
||||
@router.get("/task-status/{task_id}")
|
||||
async def get_task_status(task_id: str, current_user: User = Depends(get_current_admin)):
|
||||
from app.workers.celery_app import celery_app
|
||||
|
||||
@@ -6,7 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from app.database import get_db
|
||||
from app.models import Bill, BillAction, BillBrief, NewsArticle, TrendScore
|
||||
from app.models import Bill, BillAction, BillBrief, BillDocument, NewsArticle, TrendScore
|
||||
from app.schemas.schemas import (
|
||||
BillDetailSchema,
|
||||
BillSchema,
|
||||
@@ -109,6 +109,10 @@ async def get_bill(bill_id: str, db: AsyncSession = Depends(get_db)):
|
||||
detail.latest_brief = bill.briefs[0]
|
||||
if bill.trend_scores:
|
||||
detail.latest_trend = bill.trend_scores[0]
|
||||
doc_exists = await db.scalar(
|
||||
select(func.count()).select_from(BillDocument).where(BillDocument.bill_id == bill_id)
|
||||
)
|
||||
detail.has_document = bool(doc_exists)
|
||||
|
||||
# Trigger a background news refresh if no articles are stored but trend
|
||||
# data shows there are gnews results out there waiting to be fetched.
|
||||
|
||||
@@ -3,6 +3,23 @@ from typing import Any, Generic, Optional, TypeVar
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
# ── Notifications ──────────────────────────────────────────────────────────────
|
||||
|
||||
class NotificationSettingsResponse(BaseModel):
|
||||
ntfy_topic_url: str = ""
|
||||
ntfy_token: str = ""
|
||||
ntfy_enabled: bool = False
|
||||
rss_token: Optional[str] = None
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class NotificationSettingsUpdate(BaseModel):
|
||||
ntfy_topic_url: Optional[str] = None
|
||||
ntfy_token: Optional[str] = None
|
||||
ntfy_enabled: Optional[bool] = None
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
@@ -144,6 +161,7 @@ class BillDetailSchema(BillSchema):
|
||||
news_articles: list[NewsArticleSchema] = []
|
||||
trend_scores: list[TrendScoreSchema] = []
|
||||
briefs: list[BriefSchema] = []
|
||||
has_document: bool = False
|
||||
|
||||
|
||||
# ── Follow ────────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -15,6 +15,7 @@ celery_app = Celery(
|
||||
"app.workers.news_fetcher",
|
||||
"app.workers.trend_scorer",
|
||||
"app.workers.member_interest",
|
||||
"app.workers.notification_dispatcher",
|
||||
],
|
||||
)
|
||||
|
||||
@@ -37,6 +38,7 @@ celery_app.conf.update(
|
||||
"app.workers.news_fetcher.*": {"queue": "news"},
|
||||
"app.workers.trend_scorer.*": {"queue": "news"},
|
||||
"app.workers.member_interest.*": {"queue": "news"},
|
||||
"app.workers.notification_dispatcher.*": {"queue": "polling"},
|
||||
},
|
||||
task_queues=[
|
||||
Queue("polling"),
|
||||
@@ -72,5 +74,9 @@ celery_app.conf.update(
|
||||
"task": "app.workers.congress_poller.fetch_actions_for_active_bills",
|
||||
"schedule": crontab(hour=4, minute=0), # 4 AM UTC, after trend + member scoring
|
||||
},
|
||||
"dispatch-notifications": {
|
||||
"task": "app.workers.notification_dispatcher.dispatch_notifications",
|
||||
"schedule": crontab(minute="*/5"), # Every 5 minutes
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
@@ -300,17 +300,95 @@ def fetch_actions_for_active_bills(self):
|
||||
def _update_bill_if_changed(db, existing: Bill, parsed: dict) -> bool:
|
||||
"""Update bill fields if anything has changed. Returns True if updated."""
|
||||
changed = False
|
||||
dirty = False
|
||||
|
||||
# Meaningful change fields — trigger document + action fetch when updated
|
||||
track_fields = ["title", "short_title", "latest_action_date", "latest_action_text", "status"]
|
||||
for field in track_fields:
|
||||
new_val = parsed.get(field)
|
||||
if new_val and getattr(existing, field) != new_val:
|
||||
setattr(existing, field, new_val)
|
||||
changed = True
|
||||
dirty = True
|
||||
|
||||
# Static fields — only fill in if currently null; no change trigger needed
|
||||
fill_null_fields = ["introduced_date", "congress_url", "chamber"]
|
||||
for field in fill_null_fields:
|
||||
new_val = parsed.get(field)
|
||||
if new_val and getattr(existing, field) is None:
|
||||
setattr(existing, field, new_val)
|
||||
dirty = True
|
||||
|
||||
if changed:
|
||||
existing.last_checked_at = datetime.now(timezone.utc)
|
||||
if dirty:
|
||||
db.commit()
|
||||
# Check for new text versions and sync actions now that the bill has changed
|
||||
if changed:
|
||||
from app.workers.document_fetcher import fetch_bill_documents
|
||||
fetch_bill_documents.delay(existing.bill_id)
|
||||
fetch_bill_actions.delay(existing.bill_id)
|
||||
return changed
|
||||
|
||||
|
||||
@celery_app.task(bind=True, name="app.workers.congress_poller.backfill_bill_metadata")
|
||||
def backfill_bill_metadata(self):
|
||||
"""
|
||||
Find bills with null introduced_date (or other static fields) and
|
||||
re-fetch their detail from Congress.gov to fill in the missing values.
|
||||
No document or LLM calls — metadata only.
|
||||
"""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
from sqlalchemy import text as sa_text
|
||||
rows = db.execute(sa_text("""
|
||||
SELECT bill_id, congress_number, bill_type, bill_number
|
||||
FROM bills
|
||||
WHERE introduced_date IS NULL
|
||||
OR congress_url IS NULL
|
||||
OR chamber IS NULL
|
||||
""")).fetchall()
|
||||
|
||||
updated = 0
|
||||
skipped = 0
|
||||
for row in rows:
|
||||
try:
|
||||
detail = congress_api.get_bill_detail(
|
||||
row.congress_number, row.bill_type, row.bill_number
|
||||
)
|
||||
bill_data = detail.get("bill", {})
|
||||
parsed = congress_api.parse_bill_from_api(
|
||||
{
|
||||
"type": row.bill_type,
|
||||
"number": row.bill_number,
|
||||
"introducedDate": bill_data.get("introducedDate"),
|
||||
"title": bill_data.get("title"),
|
||||
"shortTitle": bill_data.get("shortTitle"),
|
||||
"latestAction": bill_data.get("latestAction") or {},
|
||||
},
|
||||
row.congress_number,
|
||||
)
|
||||
bill = db.get(Bill, row.bill_id)
|
||||
if not bill:
|
||||
skipped += 1
|
||||
continue
|
||||
fill_null_fields = ["introduced_date", "congress_url", "chamber", "title", "short_title"]
|
||||
dirty = False
|
||||
for field in fill_null_fields:
|
||||
new_val = parsed.get(field)
|
||||
if new_val and getattr(bill, field) is None:
|
||||
setattr(bill, field, new_val)
|
||||
dirty = True
|
||||
if dirty:
|
||||
db.commit()
|
||||
updated += 1
|
||||
else:
|
||||
skipped += 1
|
||||
time.sleep(0.2) # ~300 req/min — well under the 5k/hr limit
|
||||
except Exception as exc:
|
||||
logger.warning(f"backfill_bill_metadata: failed for {row.bill_id}: {exc}")
|
||||
skipped += 1
|
||||
|
||||
logger.info(f"backfill_bill_metadata: {updated} updated, {skipped} skipped")
|
||||
return {"updated": updated, "skipped": skipped}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
@@ -199,3 +199,55 @@ def backfill_brief_citations(self):
|
||||
return {"total": total, "queued": queued, "skipped": skipped}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@celery_app.task(bind=True, name="app.workers.llm_processor.resume_pending_analysis")
|
||||
def resume_pending_analysis(self):
|
||||
"""
|
||||
Two-pass backfill for bills missing analysis:
|
||||
|
||||
Pass 1 — Documents with no brief (LLM tasks failed/timed out):
|
||||
Find BillDocuments that have raw_text but no BillBrief, re-queue LLM.
|
||||
|
||||
Pass 2 — Bills with no document at all:
|
||||
Find Bills with no BillDocument, re-queue document fetch (which will
|
||||
then chain into LLM if text is available on GovInfo).
|
||||
"""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
# Pass 1: docs with raw_text but no brief
|
||||
docs_no_brief = db.execute(text("""
|
||||
SELECT bd.id
|
||||
FROM bill_documents bd
|
||||
LEFT JOIN bill_briefs bb ON bb.document_id = bd.id
|
||||
WHERE bb.id IS NULL AND bd.raw_text IS NOT NULL
|
||||
""")).fetchall()
|
||||
|
||||
queued_llm = 0
|
||||
for row in docs_no_brief:
|
||||
process_document_with_llm.delay(row.id)
|
||||
queued_llm += 1
|
||||
time.sleep(0.1)
|
||||
|
||||
# Pass 2: bills with no document at all
|
||||
bills_no_doc = db.execute(text("""
|
||||
SELECT b.bill_id
|
||||
FROM bills b
|
||||
LEFT JOIN bill_documents bd ON bd.bill_id = b.bill_id
|
||||
WHERE bd.id IS NULL
|
||||
""")).fetchall()
|
||||
|
||||
queued_fetch = 0
|
||||
from app.workers.document_fetcher import fetch_bill_documents
|
||||
for row in bills_no_doc:
|
||||
fetch_bill_documents.delay(row.bill_id)
|
||||
queued_fetch += 1
|
||||
time.sleep(0.1)
|
||||
|
||||
logger.info(
|
||||
f"resume_pending_analysis: {queued_llm} LLM tasks queued, "
|
||||
f"{queued_fetch} document fetch tasks queued"
|
||||
)
|
||||
return {"queued_llm": queued_llm, "queued_fetch": queued_fetch}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
Reference in New Issue
Block a user