feat: bill action pipeline, What Changed UI, citation backfill, admin panel

Backend:
- Add fetch_bill_actions task with pagination and idempotent upsert
- Add fetch_actions_for_active_bills nightly batch (4 AM UTC beat schedule)
- Wire fetch_bill_actions into new-bill creation and _update_bill_if_changed
- Add backfill_brief_citations task: detects pre-citation briefs by JSONB
  type check, deletes them, re-queues LLM processing against stored text
  (LLM calls only — zero Congress.gov or GovInfo calls)
- Add admin endpoints: POST /bills/{id}/reprocess, /backfill-citations,
  /trigger-fetch-actions; add uncited_briefs count to /stats

Frontend:
- New BriefPanel component: wraps AIBriefCard, adds amber "What Changed"
  badge for amendment briefs and collapsible version history with
  inline brief expansion
- Swap AIBriefCard for BriefPanel on bill detail page
- Admin panel: Backfill Citations + Fetch Bill Actions buttons; amber
  warning in stats when uncited briefs remain
- Add feature roadmap document with phased plan through Phase 5

Co-Authored-By: Jack Levy
This commit is contained in:
Jack Levy
2026-03-01 03:03:29 -05:00
parent b57833d4b7
commit d5711312b8
9 changed files with 419 additions and 7 deletions

View File

@@ -1,5 +1,5 @@
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import func, select
from sqlalchemy import func, select, text
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.dependencies import get_current_admin
@@ -98,18 +98,35 @@ async def get_stats(
amendment_briefs = (await db.execute(
select(func.count()).select_from(BillBrief).where(BillBrief.brief_type == "amendment")
)).scalar()
uncited_briefs = (await db.execute(
text("""
SELECT COUNT(*) FROM bill_briefs
WHERE key_points IS NOT NULL
AND jsonb_array_length(key_points) > 0
AND jsonb_typeof(key_points->0) = 'string'
""")
)).scalar()
return {
"total_bills": total_bills,
"docs_fetched": docs_fetched,
"briefs_generated": total_briefs,
"full_briefs": full_briefs,
"amendment_briefs": amendment_briefs,
"uncited_briefs": uncited_briefs,
"remaining": total_bills - total_briefs,
}
# ── Celery Tasks ──────────────────────────────────────────────────────────────
@router.post("/backfill-citations")
async def backfill_citations(current_user: User = Depends(get_current_admin)):
"""Delete pre-citation briefs and re-queue LLM processing using stored document text."""
from app.workers.llm_processor import backfill_brief_citations
task = backfill_brief_citations.delay()
return {"task_id": task.id, "status": "queued"}
@router.post("/backfill-sponsors")
async def backfill_sponsors(current_user: User = Depends(get_current_admin)):
from app.workers.congress_poller import backfill_sponsor_ids
@@ -131,6 +148,13 @@ async def trigger_member_sync(current_user: User = Depends(get_current_admin)):
return {"task_id": task.id, "status": "queued"}
@router.post("/trigger-fetch-actions")
async def trigger_fetch_actions(current_user: User = Depends(get_current_admin)):
from app.workers.congress_poller import fetch_actions_for_active_bills
task = fetch_actions_for_active_bills.delay()
return {"task_id": task.id, "status": "queued"}
@router.post("/trigger-trend-scores")
async def trigger_trend_scores(current_user: User = Depends(get_current_admin)):
from app.workers.trend_scorer import calculate_all_trend_scores
@@ -138,6 +162,16 @@ async def trigger_trend_scores(current_user: User = Depends(get_current_admin)):
return {"task_id": task.id, "status": "queued"}
@router.post("/bills/{bill_id}/reprocess")
async def reprocess_bill(bill_id: str, current_user: User = Depends(get_current_admin)):
"""Queue document and action fetches for a specific bill. Useful for debugging."""
from app.workers.document_fetcher import fetch_bill_documents
from app.workers.congress_poller import fetch_bill_actions
doc_task = fetch_bill_documents.delay(bill_id)
actions_task = fetch_bill_actions.delay(bill_id)
return {"task_ids": {"documents": doc_task.id, "actions": actions_task.id}}
@router.get("/task-status/{task_id}")
async def get_task_status(task_id: str, current_user: User = Depends(get_current_admin)):
from app.workers.celery_app import celery_app

View File

@@ -68,5 +68,9 @@ celery_app.conf.update(
"task": "app.workers.member_interest.calculate_all_member_trend_scores",
"schedule": crontab(hour=3, minute=0),
},
"fetch-actions-active-bills": {
"task": "app.workers.congress_poller.fetch_actions_for_active_bills",
"schedule": crontab(hour=4, minute=0), # 4 AM UTC, after trend + member scoring
},
},
)

View File

@@ -6,8 +6,11 @@ Uses fromDateTime to fetch only recently updated bills.
All operations are idempotent.
"""
import logging
import time
from datetime import datetime, timedelta, timezone
from sqlalchemy import or_
from app.database import get_sync_db
from app.models import Bill, BillAction, Member, AppSetting
from app.services import congress_api
@@ -84,9 +87,10 @@ def poll_congress_bills(self):
db.add(Bill(**parsed))
db.commit()
new_count += 1
# Enqueue document fetch
# Enqueue document and action fetches
from app.workers.document_fetcher import fetch_bill_documents
fetch_bill_documents.delay(bill_id)
fetch_bill_actions.delay(bill_id)
else:
_update_bill_if_changed(db, existing, parsed)
updated_count += 1
@@ -198,6 +202,101 @@ def backfill_sponsor_ids(self):
db.close()
@celery_app.task(bind=True, max_retries=3, name="app.workers.congress_poller.fetch_bill_actions")
def fetch_bill_actions(self, bill_id: str):
"""Fetch and sync all actions for a bill from Congress.gov. Idempotent."""
db = get_sync_db()
try:
bill = db.get(Bill, bill_id)
if not bill:
logger.warning(f"fetch_bill_actions: bill {bill_id} not found")
return
offset = 0
inserted = 0
while True:
try:
response = congress_api.get_bill_actions(
bill.congress_number, bill.bill_type, bill.bill_number, offset=offset
)
except Exception as exc:
raise self.retry(exc=exc, countdown=60)
actions_data = response.get("actions", [])
if not actions_data:
break
for action in actions_data:
action_date_str = action.get("actionDate")
action_text = action.get("text", "")
action_type = action.get("type")
chamber = action.get("chamber")
# Idempotency check: skip if (bill_id, action_date, action_text) exists
exists = (
db.query(BillAction)
.filter(
BillAction.bill_id == bill_id,
BillAction.action_date == action_date_str,
BillAction.action_text == action_text,
)
.first()
)
if not exists:
db.add(BillAction(
bill_id=bill_id,
action_date=action_date_str,
action_text=action_text,
action_type=action_type,
chamber=chamber,
))
inserted += 1
db.commit()
offset += 250
if len(actions_data) < 250:
break
bill.actions_fetched_at = datetime.now(timezone.utc)
db.commit()
logger.info(f"fetch_bill_actions: {bill_id} — inserted {inserted} new actions")
return {"bill_id": bill_id, "inserted": inserted}
except Exception as exc:
db.rollback()
raise
finally:
db.close()
@celery_app.task(bind=True, name="app.workers.congress_poller.fetch_actions_for_active_bills")
def fetch_actions_for_active_bills(self):
"""Nightly batch: enqueue action fetches for recently active bills missing action data."""
db = get_sync_db()
try:
cutoff = datetime.now(timezone.utc).date() - timedelta(days=30)
bills = (
db.query(Bill)
.filter(
Bill.latest_action_date >= cutoff,
or_(
Bill.actions_fetched_at.is_(None),
Bill.latest_action_date > Bill.actions_fetched_at,
),
)
.limit(200)
.all()
)
queued = 0
for bill in bills:
fetch_bill_actions.delay(bill.bill_id)
queued += 1
time.sleep(0.2) # ~5 tasks/sec to avoid Redis burst
logger.info(f"fetch_actions_for_active_bills: queued {queued} bills")
return {"queued": queued}
finally:
db.close()
def _update_bill_if_changed(db, existing: Bill, parsed: dict) -> bool:
"""Update bill fields if anything has changed. Returns True if updated."""
changed = False
@@ -210,7 +309,8 @@ def _update_bill_if_changed(db, existing: Bill, parsed: dict) -> bool:
if changed:
existing.last_checked_at = datetime.now(timezone.utc)
db.commit()
# Check for new text versions now that the bill has changed
# Check for new text versions and sync actions now that the bill has changed
from app.workers.document_fetcher import fetch_bill_documents
fetch_bill_documents.delay(existing.bill_id)
fetch_bill_actions.delay(existing.bill_id)
return changed

View File

@@ -3,6 +3,9 @@ LLM processor — generates AI briefs for fetched bill documents.
Triggered by document_fetcher after successful text retrieval.
"""
import logging
import time
from sqlalchemy import text
from app.database import get_sync_db
from app.models import Bill, BillBrief, BillDocument, Member
@@ -106,3 +109,55 @@ def process_document_with_llm(self, document_id: int):
raise self.retry(exc=exc, countdown=300) # 5 min backoff for LLM failures
finally:
db.close()
@celery_app.task(bind=True, name="app.workers.llm_processor.backfill_brief_citations")
def backfill_brief_citations(self):
"""
Find briefs generated before citation support was added (key_points contains plain
strings instead of {text, citation, quote} objects), delete them, and re-queue
LLM processing against the already-stored document text.
No Congress.gov or GovInfo calls — only LLM calls.
"""
db = get_sync_db()
try:
uncited = db.execute(text("""
SELECT id, document_id, bill_id
FROM bill_briefs
WHERE key_points IS NOT NULL
AND jsonb_array_length(key_points) > 0
AND jsonb_typeof(key_points->0) = 'string'
""")).fetchall()
total = len(uncited)
queued = 0
skipped = 0
for row in uncited:
if not row.document_id:
skipped += 1
continue
# Confirm the document still has text before deleting the brief
doc = db.get(BillDocument, row.document_id)
if not doc or not doc.raw_text:
skipped += 1
continue
brief = db.get(BillBrief, row.id)
if brief:
db.delete(brief)
db.commit()
process_document_with_llm.delay(row.document_id)
queued += 1
time.sleep(0.1) # Avoid burst-queuing all LLM tasks at once
logger.info(
f"backfill_brief_citations: {total} uncited briefs found, "
f"{queued} re-queued, {skipped} skipped (no document text)"
)
return {"total": total, "queued": queued, "skipped": skipped}
finally:
db.close()