feat: bill action pipeline, What Changed UI, citation backfill, admin panel
Backend:
- Add fetch_bill_actions task with pagination and idempotent upsert
- Add fetch_actions_for_active_bills nightly batch (4 AM UTC beat schedule)
- Wire fetch_bill_actions into new-bill creation and _update_bill_if_changed
- Add backfill_brief_citations task: detects pre-citation briefs by JSONB
type check, deletes them, re-queues LLM processing against stored text
(LLM calls only — zero Congress.gov or GovInfo calls)
- Add admin endpoints: POST /bills/{id}/reprocess, /backfill-citations,
/trigger-fetch-actions; add uncited_briefs count to /stats
Frontend:
- New BriefPanel component: wraps AIBriefCard, adds amber "What Changed"
badge for amendment briefs and collapsible version history with
inline brief expansion
- Swap AIBriefCard for BriefPanel on bill detail page
- Admin panel: Backfill Citations + Fetch Bill Actions buttons; amber
warning in stats when uncited briefs remain
- Add feature roadmap document with phased plan through Phase 5
Co-Authored-By: Jack Levy
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy import func, select, text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.dependencies import get_current_admin
|
||||
@@ -98,18 +98,35 @@ async def get_stats(
|
||||
amendment_briefs = (await db.execute(
|
||||
select(func.count()).select_from(BillBrief).where(BillBrief.brief_type == "amendment")
|
||||
)).scalar()
|
||||
uncited_briefs = (await db.execute(
|
||||
text("""
|
||||
SELECT COUNT(*) FROM bill_briefs
|
||||
WHERE key_points IS NOT NULL
|
||||
AND jsonb_array_length(key_points) > 0
|
||||
AND jsonb_typeof(key_points->0) = 'string'
|
||||
""")
|
||||
)).scalar()
|
||||
return {
|
||||
"total_bills": total_bills,
|
||||
"docs_fetched": docs_fetched,
|
||||
"briefs_generated": total_briefs,
|
||||
"full_briefs": full_briefs,
|
||||
"amendment_briefs": amendment_briefs,
|
||||
"uncited_briefs": uncited_briefs,
|
||||
"remaining": total_bills - total_briefs,
|
||||
}
|
||||
|
||||
|
||||
# ── Celery Tasks ──────────────────────────────────────────────────────────────
|
||||
|
||||
@router.post("/backfill-citations")
|
||||
async def backfill_citations(current_user: User = Depends(get_current_admin)):
|
||||
"""Delete pre-citation briefs and re-queue LLM processing using stored document text."""
|
||||
from app.workers.llm_processor import backfill_brief_citations
|
||||
task = backfill_brief_citations.delay()
|
||||
return {"task_id": task.id, "status": "queued"}
|
||||
|
||||
|
||||
@router.post("/backfill-sponsors")
|
||||
async def backfill_sponsors(current_user: User = Depends(get_current_admin)):
|
||||
from app.workers.congress_poller import backfill_sponsor_ids
|
||||
@@ -131,6 +148,13 @@ async def trigger_member_sync(current_user: User = Depends(get_current_admin)):
|
||||
return {"task_id": task.id, "status": "queued"}
|
||||
|
||||
|
||||
@router.post("/trigger-fetch-actions")
|
||||
async def trigger_fetch_actions(current_user: User = Depends(get_current_admin)):
|
||||
from app.workers.congress_poller import fetch_actions_for_active_bills
|
||||
task = fetch_actions_for_active_bills.delay()
|
||||
return {"task_id": task.id, "status": "queued"}
|
||||
|
||||
|
||||
@router.post("/trigger-trend-scores")
|
||||
async def trigger_trend_scores(current_user: User = Depends(get_current_admin)):
|
||||
from app.workers.trend_scorer import calculate_all_trend_scores
|
||||
@@ -138,6 +162,16 @@ async def trigger_trend_scores(current_user: User = Depends(get_current_admin)):
|
||||
return {"task_id": task.id, "status": "queued"}
|
||||
|
||||
|
||||
@router.post("/bills/{bill_id}/reprocess")
|
||||
async def reprocess_bill(bill_id: str, current_user: User = Depends(get_current_admin)):
|
||||
"""Queue document and action fetches for a specific bill. Useful for debugging."""
|
||||
from app.workers.document_fetcher import fetch_bill_documents
|
||||
from app.workers.congress_poller import fetch_bill_actions
|
||||
doc_task = fetch_bill_documents.delay(bill_id)
|
||||
actions_task = fetch_bill_actions.delay(bill_id)
|
||||
return {"task_ids": {"documents": doc_task.id, "actions": actions_task.id}}
|
||||
|
||||
|
||||
@router.get("/task-status/{task_id}")
|
||||
async def get_task_status(task_id: str, current_user: User = Depends(get_current_admin)):
|
||||
from app.workers.celery_app import celery_app
|
||||
|
||||
@@ -68,5 +68,9 @@ celery_app.conf.update(
|
||||
"task": "app.workers.member_interest.calculate_all_member_trend_scores",
|
||||
"schedule": crontab(hour=3, minute=0),
|
||||
},
|
||||
"fetch-actions-active-bills": {
|
||||
"task": "app.workers.congress_poller.fetch_actions_for_active_bills",
|
||||
"schedule": crontab(hour=4, minute=0), # 4 AM UTC, after trend + member scoring
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
@@ -6,8 +6,11 @@ Uses fromDateTime to fetch only recently updated bills.
|
||||
All operations are idempotent.
|
||||
"""
|
||||
import logging
|
||||
import time
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from sqlalchemy import or_
|
||||
|
||||
from app.database import get_sync_db
|
||||
from app.models import Bill, BillAction, Member, AppSetting
|
||||
from app.services import congress_api
|
||||
@@ -84,9 +87,10 @@ def poll_congress_bills(self):
|
||||
db.add(Bill(**parsed))
|
||||
db.commit()
|
||||
new_count += 1
|
||||
# Enqueue document fetch
|
||||
# Enqueue document and action fetches
|
||||
from app.workers.document_fetcher import fetch_bill_documents
|
||||
fetch_bill_documents.delay(bill_id)
|
||||
fetch_bill_actions.delay(bill_id)
|
||||
else:
|
||||
_update_bill_if_changed(db, existing, parsed)
|
||||
updated_count += 1
|
||||
@@ -198,6 +202,101 @@ def backfill_sponsor_ids(self):
|
||||
db.close()
|
||||
|
||||
|
||||
@celery_app.task(bind=True, max_retries=3, name="app.workers.congress_poller.fetch_bill_actions")
|
||||
def fetch_bill_actions(self, bill_id: str):
|
||||
"""Fetch and sync all actions for a bill from Congress.gov. Idempotent."""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
bill = db.get(Bill, bill_id)
|
||||
if not bill:
|
||||
logger.warning(f"fetch_bill_actions: bill {bill_id} not found")
|
||||
return
|
||||
|
||||
offset = 0
|
||||
inserted = 0
|
||||
while True:
|
||||
try:
|
||||
response = congress_api.get_bill_actions(
|
||||
bill.congress_number, bill.bill_type, bill.bill_number, offset=offset
|
||||
)
|
||||
except Exception as exc:
|
||||
raise self.retry(exc=exc, countdown=60)
|
||||
|
||||
actions_data = response.get("actions", [])
|
||||
if not actions_data:
|
||||
break
|
||||
|
||||
for action in actions_data:
|
||||
action_date_str = action.get("actionDate")
|
||||
action_text = action.get("text", "")
|
||||
action_type = action.get("type")
|
||||
chamber = action.get("chamber")
|
||||
|
||||
# Idempotency check: skip if (bill_id, action_date, action_text) exists
|
||||
exists = (
|
||||
db.query(BillAction)
|
||||
.filter(
|
||||
BillAction.bill_id == bill_id,
|
||||
BillAction.action_date == action_date_str,
|
||||
BillAction.action_text == action_text,
|
||||
)
|
||||
.first()
|
||||
)
|
||||
if not exists:
|
||||
db.add(BillAction(
|
||||
bill_id=bill_id,
|
||||
action_date=action_date_str,
|
||||
action_text=action_text,
|
||||
action_type=action_type,
|
||||
chamber=chamber,
|
||||
))
|
||||
inserted += 1
|
||||
|
||||
db.commit()
|
||||
offset += 250
|
||||
if len(actions_data) < 250:
|
||||
break
|
||||
|
||||
bill.actions_fetched_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
logger.info(f"fetch_bill_actions: {bill_id} — inserted {inserted} new actions")
|
||||
return {"bill_id": bill_id, "inserted": inserted}
|
||||
except Exception as exc:
|
||||
db.rollback()
|
||||
raise
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@celery_app.task(bind=True, name="app.workers.congress_poller.fetch_actions_for_active_bills")
|
||||
def fetch_actions_for_active_bills(self):
|
||||
"""Nightly batch: enqueue action fetches for recently active bills missing action data."""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
cutoff = datetime.now(timezone.utc).date() - timedelta(days=30)
|
||||
bills = (
|
||||
db.query(Bill)
|
||||
.filter(
|
||||
Bill.latest_action_date >= cutoff,
|
||||
or_(
|
||||
Bill.actions_fetched_at.is_(None),
|
||||
Bill.latest_action_date > Bill.actions_fetched_at,
|
||||
),
|
||||
)
|
||||
.limit(200)
|
||||
.all()
|
||||
)
|
||||
queued = 0
|
||||
for bill in bills:
|
||||
fetch_bill_actions.delay(bill.bill_id)
|
||||
queued += 1
|
||||
time.sleep(0.2) # ~5 tasks/sec to avoid Redis burst
|
||||
logger.info(f"fetch_actions_for_active_bills: queued {queued} bills")
|
||||
return {"queued": queued}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _update_bill_if_changed(db, existing: Bill, parsed: dict) -> bool:
|
||||
"""Update bill fields if anything has changed. Returns True if updated."""
|
||||
changed = False
|
||||
@@ -210,7 +309,8 @@ def _update_bill_if_changed(db, existing: Bill, parsed: dict) -> bool:
|
||||
if changed:
|
||||
existing.last_checked_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
# Check for new text versions now that the bill has changed
|
||||
# Check for new text versions and sync actions now that the bill has changed
|
||||
from app.workers.document_fetcher import fetch_bill_documents
|
||||
fetch_bill_documents.delay(existing.bill_id)
|
||||
fetch_bill_actions.delay(existing.bill_id)
|
||||
return changed
|
||||
|
||||
@@ -3,6 +3,9 @@ LLM processor — generates AI briefs for fetched bill documents.
|
||||
Triggered by document_fetcher after successful text retrieval.
|
||||
"""
|
||||
import logging
|
||||
import time
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
from app.database import get_sync_db
|
||||
from app.models import Bill, BillBrief, BillDocument, Member
|
||||
@@ -106,3 +109,55 @@ def process_document_with_llm(self, document_id: int):
|
||||
raise self.retry(exc=exc, countdown=300) # 5 min backoff for LLM failures
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@celery_app.task(bind=True, name="app.workers.llm_processor.backfill_brief_citations")
|
||||
def backfill_brief_citations(self):
|
||||
"""
|
||||
Find briefs generated before citation support was added (key_points contains plain
|
||||
strings instead of {text, citation, quote} objects), delete them, and re-queue
|
||||
LLM processing against the already-stored document text.
|
||||
|
||||
No Congress.gov or GovInfo calls — only LLM calls.
|
||||
"""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
uncited = db.execute(text("""
|
||||
SELECT id, document_id, bill_id
|
||||
FROM bill_briefs
|
||||
WHERE key_points IS NOT NULL
|
||||
AND jsonb_array_length(key_points) > 0
|
||||
AND jsonb_typeof(key_points->0) = 'string'
|
||||
""")).fetchall()
|
||||
|
||||
total = len(uncited)
|
||||
queued = 0
|
||||
skipped = 0
|
||||
|
||||
for row in uncited:
|
||||
if not row.document_id:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Confirm the document still has text before deleting the brief
|
||||
doc = db.get(BillDocument, row.document_id)
|
||||
if not doc or not doc.raw_text:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
brief = db.get(BillBrief, row.id)
|
||||
if brief:
|
||||
db.delete(brief)
|
||||
db.commit()
|
||||
|
||||
process_document_with_llm.delay(row.document_id)
|
||||
queued += 1
|
||||
time.sleep(0.1) # Avoid burst-queuing all LLM tasks at once
|
||||
|
||||
logger.info(
|
||||
f"backfill_brief_citations: {total} uncited briefs found, "
|
||||
f"{queued} re-queued, {skipped} skipped (no document text)"
|
||||
)
|
||||
return {"total": total, "queued": queued, "skipped": skipped}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
Reference in New Issue
Block a user