feat: bill action pipeline, What Changed UI, citation backfill, admin panel

Backend: - Add fetch_bill_actions task with pagination and idempotent upsert - Add fetch_actions_for_active_bills nightly batch (4 AM UTC beat schedule) - Wire fetch_bill_actions into new-bill creation and _update_bill_if_changed - Add backfill_brief_citations task: detects pre-citation briefs by JSONB type check, deletes them, re-queues LLM processing against stored text (LLM calls only — zero Congress.gov or GovInfo calls) - Add admin endpoints: POST /bills/{id}/reprocess, /backfill-citations, /trigger-fetch-actions; add uncited_briefs count to /stats Frontend: - New BriefPanel component: wraps AIBriefCard, adds amber "What Changed" badge for amendment briefs and collapsible version history with inline brief expansion - Swap AIBriefCard for BriefPanel on bill detail page - Admin panel: Backfill Citations + Fetch Bill Actions buttons; amber warning in stats when uncited briefs remain - Add feature roadmap document with phased plan through Phase 5 Co-Authored-By: Jack Levy
2026-03-01 03:03:29 -05:00
parent b57833d4b7
commit d5711312b8
9 changed files with 419 additions and 7 deletions
--- a/backend/app/api/admin.py
+++ b/backend/app/api/admin.py
@@ -1,5 +1,5 @@
 from fastapi import APIRouter, Depends, HTTPException
-from sqlalchemy import func, select
+from sqlalchemy import func, select, text
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.core.dependencies import get_current_admin
@@ -98,18 +98,35 @@ async def get_stats(
    amendment_briefs = (await db.execute(
        select(func.count()).select_from(BillBrief).where(BillBrief.brief_type == "amendment")
    )).scalar()
+    uncited_briefs = (await db.execute(
+        text("""
+            SELECT COUNT(*) FROM bill_briefs
+            WHERE key_points IS NOT NULL
+              AND jsonb_array_length(key_points) > 0
+              AND jsonb_typeof(key_points->0) = 'string'
+        """)
+    )).scalar()
    return {
        "total_bills": total_bills,
        "docs_fetched": docs_fetched,
        "briefs_generated": total_briefs,
        "full_briefs": full_briefs,
        "amendment_briefs": amendment_briefs,
+        "uncited_briefs": uncited_briefs,
        "remaining": total_bills - total_briefs,
    }


 # ── Celery Tasks ──────────────────────────────────────────────────────────────

+@router.post("/backfill-citations")
+async def backfill_citations(current_user: User = Depends(get_current_admin)):
+    """Delete pre-citation briefs and re-queue LLM processing using stored document text."""
+    from app.workers.llm_processor import backfill_brief_citations
+    task = backfill_brief_citations.delay()
+    return {"task_id": task.id, "status": "queued"}
+
+
@router.post("/backfill-sponsors")
 async def backfill_sponsors(current_user: User = Depends(get_current_admin)):
    from app.workers.congress_poller import backfill_sponsor_ids
@@ -131,6 +148,13 @@ async def trigger_member_sync(current_user: User = Depends(get_current_admin)):
    return {"task_id": task.id, "status": "queued"}


+@router.post("/trigger-fetch-actions")
+async def trigger_fetch_actions(current_user: User = Depends(get_current_admin)):
+    from app.workers.congress_poller import fetch_actions_for_active_bills
+    task = fetch_actions_for_active_bills.delay()
+    return {"task_id": task.id, "status": "queued"}
+
+
@router.post("/trigger-trend-scores")
 async def trigger_trend_scores(current_user: User = Depends(get_current_admin)):
    from app.workers.trend_scorer import calculate_all_trend_scores
@@ -138,6 +162,16 @@ async def trigger_trend_scores(current_user: User = Depends(get_current_admin)):
    return {"task_id": task.id, "status": "queued"}


+@router.post("/bills/{bill_id}/reprocess")
+async def reprocess_bill(bill_id: str, current_user: User = Depends(get_current_admin)):
+    """Queue document and action fetches for a specific bill. Useful for debugging."""
+    from app.workers.document_fetcher import fetch_bill_documents
+    from app.workers.congress_poller import fetch_bill_actions
+    doc_task = fetch_bill_documents.delay(bill_id)
+    actions_task = fetch_bill_actions.delay(bill_id)
+    return {"task_ids": {"documents": doc_task.id, "actions": actions_task.id}}
+
+
@router.get("/task-status/{task_id}")
 async def get_task_status(task_id: str, current_user: User = Depends(get_current_admin)):
    from app.workers.celery_app import celery_app
--- a/backend/app/workers/celery_app.py
+++ b/backend/app/workers/celery_app.py
@@ -68,5 +68,9 @@ celery_app.conf.update(
            "task": "app.workers.member_interest.calculate_all_member_trend_scores",
            "schedule": crontab(hour=3, minute=0),
        },
+        "fetch-actions-active-bills": {
+            "task": "app.workers.congress_poller.fetch_actions_for_active_bills",
+            "schedule": crontab(hour=4, minute=0),  # 4 AM UTC, after trend + member scoring
+        },
    },
 )
--- a/backend/app/workers/congress_poller.py
+++ b/backend/app/workers/congress_poller.py
@@ -6,8 +6,11 @@ Uses fromDateTime to fetch only recently updated bills.
 All operations are idempotent.
 """
 import logging
+import time
 from datetime import datetime, timedelta, timezone

+from sqlalchemy import or_
+
 from app.database import get_sync_db
 from app.models import Bill, BillAction, Member, AppSetting
 from app.services import congress_api
@@ -84,9 +87,10 @@ def poll_congress_bills(self):
                    db.add(Bill(**parsed))
                    db.commit()
                    new_count += 1
-                    # Enqueue document fetch
+                    # Enqueue document and action fetches
                    from app.workers.document_fetcher import fetch_bill_documents
                    fetch_bill_documents.delay(bill_id)
+                    fetch_bill_actions.delay(bill_id)
                else:
                    _update_bill_if_changed(db, existing, parsed)
                    updated_count += 1
@@ -198,6 +202,101 @@ def backfill_sponsor_ids(self):
        db.close()


+@celery_app.task(bind=True, max_retries=3, name="app.workers.congress_poller.fetch_bill_actions")
+def fetch_bill_actions(self, bill_id: str):
+    """Fetch and sync all actions for a bill from Congress.gov. Idempotent."""
+    db = get_sync_db()
+    try:
+        bill = db.get(Bill, bill_id)
+        if not bill:
+            logger.warning(f"fetch_bill_actions: bill {bill_id} not found")
+            return
+
+        offset = 0
+        inserted = 0
+        while True:
+            try:
+                response = congress_api.get_bill_actions(
+                    bill.congress_number, bill.bill_type, bill.bill_number, offset=offset
+                )
+            except Exception as exc:
+                raise self.retry(exc=exc, countdown=60)
+
+            actions_data = response.get("actions", [])
+            if not actions_data:
+                break
+
+            for action in actions_data:
+                action_date_str = action.get("actionDate")
+                action_text = action.get("text", "")
+                action_type = action.get("type")
+                chamber = action.get("chamber")
+
+                # Idempotency check: skip if (bill_id, action_date, action_text) exists
+                exists = (
+                    db.query(BillAction)
+                    .filter(
+                        BillAction.bill_id == bill_id,
+                        BillAction.action_date == action_date_str,
+                        BillAction.action_text == action_text,
+                    )
+                    .first()
+                )
+                if not exists:
+                    db.add(BillAction(
+                        bill_id=bill_id,
+                        action_date=action_date_str,
+                        action_text=action_text,
+                        action_type=action_type,
+                        chamber=chamber,
+                    ))
+                    inserted += 1
+
+            db.commit()
+            offset += 250
+            if len(actions_data) < 250:
+                break
+
+        bill.actions_fetched_at = datetime.now(timezone.utc)
+        db.commit()
+        logger.info(f"fetch_bill_actions: {bill_id} — inserted {inserted} new actions")
+        return {"bill_id": bill_id, "inserted": inserted}
+    except Exception as exc:
+        db.rollback()
+        raise
+    finally:
+        db.close()
+
+
+@celery_app.task(bind=True, name="app.workers.congress_poller.fetch_actions_for_active_bills")
+def fetch_actions_for_active_bills(self):
+    """Nightly batch: enqueue action fetches for recently active bills missing action data."""
+    db = get_sync_db()
+    try:
+        cutoff = datetime.now(timezone.utc).date() - timedelta(days=30)
+        bills = (
+            db.query(Bill)
+            .filter(
+                Bill.latest_action_date >= cutoff,
+                or_(
+                    Bill.actions_fetched_at.is_(None),
+                    Bill.latest_action_date > Bill.actions_fetched_at,
+                ),
+            )
+            .limit(200)
+            .all()
+        )
+        queued = 0
+        for bill in bills:
+            fetch_bill_actions.delay(bill.bill_id)
+            queued += 1
+            time.sleep(0.2)  # ~5 tasks/sec to avoid Redis burst
+        logger.info(f"fetch_actions_for_active_bills: queued {queued} bills")
+        return {"queued": queued}
+    finally:
+        db.close()
+
+
 def _update_bill_if_changed(db, existing: Bill, parsed: dict) -> bool:
    """Update bill fields if anything has changed. Returns True if updated."""
    changed = False
@@ -210,7 +309,8 @@ def _update_bill_if_changed(db, existing: Bill, parsed: dict) -> bool:
    if changed:
        existing.last_checked_at = datetime.now(timezone.utc)
        db.commit()
-        # Check for new text versions now that the bill has changed
+        # Check for new text versions and sync actions now that the bill has changed
        from app.workers.document_fetcher import fetch_bill_documents
        fetch_bill_documents.delay(existing.bill_id)
+        fetch_bill_actions.delay(existing.bill_id)
    return changed
--- a/backend/app/workers/llm_processor.py
+++ b/backend/app/workers/llm_processor.py
@@ -3,6 +3,9 @@ LLM processor — generates AI briefs for fetched bill documents.
 Triggered by document_fetcher after successful text retrieval.
 """
 import logging
+import time
+
+from sqlalchemy import text

 from app.database import get_sync_db
 from app.models import Bill, BillBrief, BillDocument, Member
@@ -106,3 +109,55 @@ def process_document_with_llm(self, document_id: int):
        raise self.retry(exc=exc, countdown=300)  # 5 min backoff for LLM failures
    finally:
        db.close()
+
+
+@celery_app.task(bind=True, name="app.workers.llm_processor.backfill_brief_citations")
+def backfill_brief_citations(self):
+    """
+    Find briefs generated before citation support was added (key_points contains plain
+    strings instead of {text, citation, quote} objects), delete them, and re-queue
+    LLM processing against the already-stored document text.
+
+    No Congress.gov or GovInfo calls — only LLM calls.
+    """
+    db = get_sync_db()
+    try:
+        uncited = db.execute(text("""
+            SELECT id, document_id, bill_id
+            FROM bill_briefs
+            WHERE key_points IS NOT NULL
+              AND jsonb_array_length(key_points) > 0
+              AND jsonb_typeof(key_points->0) = 'string'
+        """)).fetchall()
+
+        total = len(uncited)
+        queued = 0
+        skipped = 0
+
+        for row in uncited:
+            if not row.document_id:
+                skipped += 1
+                continue
+
+            # Confirm the document still has text before deleting the brief
+            doc = db.get(BillDocument, row.document_id)
+            if not doc or not doc.raw_text:
+                skipped += 1
+                continue
+
+            brief = db.get(BillBrief, row.id)
+            if brief:
+                db.delete(brief)
+                db.commit()
+
+            process_document_with_llm.delay(row.document_id)
+            queued += 1
+            time.sleep(0.1)  # Avoid burst-queuing all LLM tasks at once
+
+        logger.info(
+            f"backfill_brief_citations: {total} uncited briefs found, "
+            f"{queued} re-queued, {skipped} skipped (no document text)"
+        )
+        return {"total": total, "queued": queued, "skipped": skipped}
+    finally:
+        db.close()