Add bill action pipeline, admin health panel, and LLM provider fixes

- Fetch bill actions from Congress.gov and populate the action timeline - Add nightly batch task and beat schedule for active bill actions - Add admin reprocess endpoint for per-bill debugging - Add BriefPanel with "What Changed" view and version history - Add External API Health section with per-source latency testing - Redesign Manual Controls as health panel with status dots and descriptions - Add Resume Analysis task for stalled LLM jobs - Add Backfill Dates & Links task for bills with null metadata - Fix LLM provider/model DB overrides being ignored (env vars used instead) - Fix Gemini 404: gemini-1.5-pro deprecated → gemini-2.0-flash - Fix Anthropic models list: use REST API directly (SDK too old for .models) - Replace test-LLM full analysis with lightweight ping (max_tokens=20) - Add has_document field to BillDetail; show "No bill text published" state - Fix "Introduced: —" showing for bills with null introduced_date - Add bills_missing_sponsor and bills_missing_metadata to admin stats - Add GovInfo health check using /collections endpoint (fixes 500 from /packages) Authored-By: Jack Levy
2026-03-01 11:06:14 -05:00
parent defc2c116d
commit 5eebc2f196
10 changed files with 586 additions and 74 deletions
--- a/backend/app/api/admin.py
+++ b/backend/app/api/admin.py
@@ -106,6 +106,30 @@ async def get_stats(
              AND jsonb_typeof(key_points->0) = 'string'
        """)
    )).scalar()
+    # Bills with null sponsor
+    bills_missing_sponsor = (await db.execute(
+        text("SELECT COUNT(*) FROM bills WHERE sponsor_id IS NULL")
+    )).scalar()
+    # Bills with null metadata (introduced_date / chamber / congress_url)
+    bills_missing_metadata = (await db.execute(
+        text("SELECT COUNT(*) FROM bills WHERE introduced_date IS NULL OR chamber IS NULL OR congress_url IS NULL")
+    )).scalar()
+    # Bills with no document record at all (text not yet published on GovInfo)
+    no_text_bills = (await db.execute(
+        text("""
+            SELECT COUNT(*) FROM bills b
+            LEFT JOIN bill_documents bd ON bd.bill_id = b.bill_id
+            WHERE bd.id IS NULL
+        """)
+    )).scalar()
+    # Documents that have text but no brief (LLM not yet run / failed)
+    pending_llm = (await db.execute(
+        text("""
+            SELECT COUNT(*) FROM bill_documents bd
+            LEFT JOIN bill_briefs bb ON bb.document_id = bd.id
+            WHERE bb.id IS NULL AND bd.raw_text IS NOT NULL
+        """)
+    )).scalar()
    return {
        "total_bills": total_bills,
        "docs_fetched": docs_fetched,
@@ -113,6 +137,10 @@ async def get_stats(
        "full_briefs": full_briefs,
        "amendment_briefs": amendment_briefs,
        "uncited_briefs": uncited_briefs,
+        "no_text_bills": no_text_bills,
+        "pending_llm": pending_llm,
+        "bills_missing_sponsor": bills_missing_sponsor,
+        "bills_missing_metadata": bills_missing_metadata,
        "remaining": total_bills - total_briefs,
    }

@@ -155,6 +183,22 @@ async def trigger_fetch_actions(current_user: User = Depends(get_current_admin))
    return {"task_id": task.id, "status": "queued"}


+@router.post("/backfill-metadata")
+async def backfill_metadata(current_user: User = Depends(get_current_admin)):
+    """Fill in null introduced_date, congress_url, chamber for existing bills."""
+    from app.workers.congress_poller import backfill_bill_metadata
+    task = backfill_bill_metadata.delay()
+    return {"task_id": task.id, "status": "queued"}
+
+
+@router.post("/resume-analysis")
+async def resume_analysis(current_user: User = Depends(get_current_admin)):
+    """Re-queue LLM processing for docs with no brief, and document fetching for bills with no doc."""
+    from app.workers.llm_processor import resume_pending_analysis
+    task = resume_pending_analysis.delay()
+    return {"task_id": task.id, "status": "queued"}
+
+
@router.post("/trigger-trend-scores")
 async def trigger_trend_scores(current_user: User = Depends(get_current_admin)):
    from app.workers.trend_scorer import calculate_all_trend_scores
@@ -172,6 +216,113 @@ async def reprocess_bill(bill_id: str, current_user: User = Depends(get_current_
    return {"task_ids": {"documents": doc_task.id, "actions": actions_task.id}}


+@router.get("/api-health")
+async def api_health(current_user: User = Depends(get_current_admin)):
+    """Test each external API and return status + latency for each."""
+    import asyncio
+    results = await asyncio.gather(
+        asyncio.to_thread(_test_congress),
+        asyncio.to_thread(_test_govinfo),
+        asyncio.to_thread(_test_newsapi),
+        asyncio.to_thread(_test_gnews),
+        return_exceptions=True,
+    )
+    keys = ["congress_gov", "govinfo", "newsapi", "google_news"]
+    return {
+        k: r if isinstance(r, dict) else {"status": "error", "detail": str(r)}
+        for k, r in zip(keys, results)
+    }
+
+
+def _timed(fn):
+    """Run fn(), return its dict merged with latency_ms."""
+    import time as _time
+    t0 = _time.perf_counter()
+    result = fn()
+    result["latency_ms"] = round((_time.perf_counter() - t0) * 1000)
+    return result
+
+
+def _test_congress() -> dict:
+    from app.config import settings
+    from app.services import congress_api
+    if not settings.DATA_GOV_API_KEY:
+        return {"status": "error", "detail": "DATA_GOV_API_KEY not configured"}
+    def _call():
+        data = congress_api.get_bills(119, limit=1)
+        count = data.get("pagination", {}).get("count") or len(data.get("bills", []))
+        return {"status": "ok", "detail": f"{count:,} bills available in 119th Congress"}
+    try:
+        return _timed(_call)
+    except Exception as exc:
+        return {"status": "error", "detail": str(exc)}
+
+
+def _test_govinfo() -> dict:
+    from app.config import settings
+    import requests as req
+    if not settings.DATA_GOV_API_KEY:
+        return {"status": "error", "detail": "DATA_GOV_API_KEY not configured"}
+    def _call():
+        # /collections lists all available collections — simple health check endpoint
+        resp = req.get(
+            "https://api.govinfo.gov/collections",
+            params={"api_key": settings.DATA_GOV_API_KEY},
+            timeout=15,
+        )
+        resp.raise_for_status()
+        data = resp.json()
+        collections = data.get("collections", [])
+        bills_col = next((c for c in collections if c.get("collectionCode") == "BILLS"), None)
+        if bills_col:
+            count = bills_col.get("packageCount", "?")
+            return {"status": "ok", "detail": f"BILLS collection: {count:,} packages" if isinstance(count, int) else "GovInfo reachable, BILLS collection found"}
+        return {"status": "ok", "detail": f"GovInfo reachable — {len(collections)} collections available"}
+    try:
+        return _timed(_call)
+    except Exception as exc:
+        return {"status": "error", "detail": str(exc)}
+
+
+def _test_newsapi() -> dict:
+    from app.config import settings
+    import requests as req
+    if not settings.NEWSAPI_KEY:
+        return {"status": "skipped", "detail": "NEWSAPI_KEY not configured"}
+    def _call():
+        resp = req.get(
+            "https://newsapi.org/v2/top-headlines",
+            params={"country": "us", "pageSize": 1, "apiKey": settings.NEWSAPI_KEY},
+            timeout=10,
+        )
+        data = resp.json()
+        if data.get("status") != "ok":
+            return {"status": "error", "detail": data.get("message", "Unknown error")}
+        return {"status": "ok", "detail": f"{data.get('totalResults', 0):,} headlines available"}
+    try:
+        return _timed(_call)
+    except Exception as exc:
+        return {"status": "error", "detail": str(exc)}
+
+
+def _test_gnews() -> dict:
+    import requests as req
+    def _call():
+        resp = req.get(
+            "https://news.google.com/rss/search",
+            params={"q": "congress", "hl": "en-US", "gl": "US", "ceid": "US:en"},
+            timeout=10,
+            headers={"User-Agent": "Mozilla/5.0"},
+        )
+        resp.raise_for_status()
+        item_count = resp.text.count("<item>")
+        return {"status": "ok", "detail": f"{item_count} items in test RSS feed"}
+    try:
+        return _timed(_call)
+    except Exception as exc:
+        return {"status": "error", "detail": str(exc)}
+
+
@router.get("/task-status/{task_id}")
 async def get_task_status(task_id: str, current_user: User = Depends(get_current_admin)):
    from app.workers.celery_app import celery_app
--- a/backend/app/api/bills.py
+++ b/backend/app/api/bills.py
@@ -6,7 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import selectinload

 from app.database import get_db
-from app.models import Bill, BillAction, BillBrief, NewsArticle, TrendScore
+from app.models import Bill, BillAction, BillBrief, BillDocument, NewsArticle, TrendScore
 from app.schemas.schemas import (
    BillDetailSchema,
    BillSchema,
@@ -109,6 +109,10 @@ async def get_bill(bill_id: str, db: AsyncSession = Depends(get_db)):
        detail.latest_brief = bill.briefs[0]
    if bill.trend_scores:
        detail.latest_trend = bill.trend_scores[0]
+    doc_exists = await db.scalar(
+        select(func.count()).select_from(BillDocument).where(BillDocument.bill_id == bill_id)
+    )
+    detail.has_document = bool(doc_exists)

    # Trigger a background news refresh if no articles are stored but trend
    # data shows there are gnews results out there waiting to be fetched.