feat: bill action pipeline, What Changed UI, citation backfill, admin panel

Backend: - Add fetch_bill_actions task with pagination and idempotent upsert - Add fetch_actions_for_active_bills nightly batch (4 AM UTC beat schedule) - Wire fetch_bill_actions into new-bill creation and _update_bill_if_changed - Add backfill_brief_citations task: detects pre-citation briefs by JSONB type check, deletes them, re-queues LLM processing against stored text (LLM calls only — zero Congress.gov or GovInfo calls) - Add admin endpoints: POST /bills/{id}/reprocess, /backfill-citations, /trigger-fetch-actions; add uncited_briefs count to /stats Frontend: - New BriefPanel component: wraps AIBriefCard, adds amber "What Changed" badge for amendment briefs and collapsible version history with inline brief expansion - Swap AIBriefCard for BriefPanel on bill detail page - Admin panel: Backfill Citations + Fetch Bill Actions buttons; amber warning in stats when uncited briefs remain - Add feature roadmap document with phased plan through Phase 5 Co-Authored-By: Jack Levy
2026-03-01 03:03:29 -05:00
parent b57833d4b7
commit d5711312b8
9 changed files with 419 additions and 7 deletions
--- a/backend/app/workers/llm_processor.py
+++ b/backend/app/workers/llm_processor.py
@@ -3,6 +3,9 @@ LLM processor — generates AI briefs for fetched bill documents.
 Triggered by document_fetcher after successful text retrieval.
 """
 import logging
+import time
+
+from sqlalchemy import text

 from app.database import get_sync_db
 from app.models import Bill, BillBrief, BillDocument, Member
@@ -106,3 +109,55 @@ def process_document_with_llm(self, document_id: int):
        raise self.retry(exc=exc, countdown=300)  # 5 min backoff for LLM failures
    finally:
        db.close()
+
+
+@celery_app.task(bind=True, name="app.workers.llm_processor.backfill_brief_citations")
+def backfill_brief_citations(self):
+    """
+    Find briefs generated before citation support was added (key_points contains plain
+    strings instead of {text, citation, quote} objects), delete them, and re-queue
+    LLM processing against the already-stored document text.
+
+    No Congress.gov or GovInfo calls — only LLM calls.
+    """
+    db = get_sync_db()
+    try:
+        uncited = db.execute(text("""
+            SELECT id, document_id, bill_id
+            FROM bill_briefs
+            WHERE key_points IS NOT NULL
+              AND jsonb_array_length(key_points) > 0
+              AND jsonb_typeof(key_points->0) = 'string'
+        """)).fetchall()
+
+        total = len(uncited)
+        queued = 0
+        skipped = 0
+
+        for row in uncited:
+            if not row.document_id:
+                skipped += 1
+                continue
+
+            # Confirm the document still has text before deleting the brief
+            doc = db.get(BillDocument, row.document_id)
+            if not doc or not doc.raw_text:
+                skipped += 1
+                continue
+
+            brief = db.get(BillBrief, row.id)
+            if brief:
+                db.delete(brief)
+                db.commit()
+
+            process_document_with_llm.delay(row.document_id)
+            queued += 1
+            time.sleep(0.1)  # Avoid burst-queuing all LLM tasks at once
+
+        logger.info(
+            f"backfill_brief_citations: {total} uncited briefs found, "
+            f"{queued} re-queued, {skipped} skipped (no document text)"
+        )
+        return {"total": total, "queued": queued, "skipped": skipped}
+    finally:
+        db.close()