diff --git a/PocketVeto — Feature Roadmap.md b/PocketVeto — Feature Roadmap.md new file mode 100644 index 0000000..155f009 --- /dev/null +++ b/PocketVeto — Feature Roadmap.md @@ -0,0 +1,81 @@ +## Roadmap + +- [x] Docker Stack — PostgreSQL, Redis, FastAPI, Celery, Next.js, Nginx fully containerized +- [x] Bill Polling — Congress.gov incremental sync every 30 min, filtered to legislation that can become law +- [x] Document Fetching — GovInfo bill text retrieval with smart truncation for token budgets +- [x] LLM Analysis — Multi-provider AI briefs (OpenAI, Anthropic, Gemini, Ollama) with amendment diffing +- [x] News Correlation — NewsAPI + Google News RSS articles linked to bills via topic tags +- [x] Trend Scoring — Composite zeitgeist score (0–100) from NewsAPI + Google News + Google Trends, nightly +- [x] Full-text Search — PostgreSQL tsvector search across bills and members +- [x] Follows — Per-user follows for bills, members, and topics +- [x] Dashboard — Personalized feed + trending bills +- [x] Multi-user Auth — JWT email/password auth, admin role, user management panel +- [x] Admin Panel — LLM provider switching, pipeline stats, manual task triggers +- [x] Citations — Every AI brief key point and risk cites the section + verbatim quote from bill text +- [x] Citation UI — § chips expand inline to show quote + GovInfo source link +- [x] Party Badges — Solid red/blue/slate badges readable in light and dark mode +- [x] Nginx DNS Fix — Resolver directive prevents stale-IP 502s after container restarts +- [x] Sponsor Linking — Poller fetches bill detail for sponsor; backfill task fixes existing bills +- [x] Member Search — "First Last" and "Last, First" both match via PostgreSQL split_part() +- [x] Search Spaces — Removed .trim() on keystroke that ate spaces in search inputs +- [x] Mobile UI — Responsive layout: sidebar collapses, cards stack, touch-friendly controls +- [x] Member BIO & Photo — Display member headshots (photo_url already stored, not yet shown in UI) +- [x] Bill Action Fetching — BillAction table populated via Congress.gov actions endpoint; nightly batch + event-driven on bill change +- [x] What Changed (Amendment Briefs) — BriefPanel surfaces amendment briefs with "What Changed" badge and collapsible version history +- [x] Source Viewer — "View source" link in § citation popover opens GovInfo document in new tab (Option A; Option B = in-app highlighted viewer deferred pending UX review) +- [x] Admin Reprocess — POST /api/admin/bills/{bill_id}/reprocess queues document + action fetches for a specific bill + +--- + +## To Do + +--- + +### Phase 1 — Notifications Plumbing *(prerequisite for Alerts and Weekly Digest)* + +- [ ] `notification_events` table — `(user_id, bill_id, event_type, payload, dispatched_at)` +- [ ] ntfy dispatch — Celery task POSTs to user's ntfy topic URL; user supplies their own topic URL (public ntfy.sh or self-hosted ntfy server with optional auth token) +- [ ] RSS feed — tokenized per-user feed at `/api/feed/{token}.xml`; token stored on user row +- [ ] User settings UI — ntfy topic URL field + optional ntfy auth token + RSS feed link/copy button + +--- + +### Phase 2 — High Impact *(can run in parallel after Phase 1)* + +- [ ] **Change-driven Alerts** — emit `notification_event` from poller/document fetcher on material changes: new doc version, substitute text, committee report, vote scheduled/result. Filter out procedural-only action text. Fan out to ntfy + RSS. +- [ ] **Fact vs Inference Labeling** — add `label: "cited_fact" | "inference"` + optional `confidence` field to each `key_point` and `risk` in the LLM JSON schema. Prompt engineering change + BillBrief schema migration. UI: small badge on each bullet (no color politics — neutral labels only). + +--- + +### Phase 3 — Personal Workflow + +- [ ] **Collections / Watchlists** — `collections` (id, user_id, name, slug, is_public) + `collection_bills` join table. UI to create/manage collections and filter dashboard by collection. Shareable via public slug URL (read-only for non-owners). +- [ ] **Personal Notes** — `bill_notes` table (user_id, bill_id, content, stance, tags, pinned). Shown on bill detail page. Private; optionally pin to top of the bill detail view. +- [ ] **Shareable Links** — UUID token on briefs and collections → public read-only view, no login required. Same token system for both. No expiry by default. UUID (not sequential) to prevent enumeration. +- [ ] **Weekly Digest** — Celery beat task (weekly), queries followed bills for changes in the past 7 days, formats a low-noise summary, dispatches via ntfy + RSS. + +--- + +### Phase 4 — Accountability + +- [ ] **Votes & Committees** — fetch roll-call votes and committee referrals/actions from Congress.gov. New `bill_votes` table. UI: timeline entries for committee actions (already partially populated from bill actions) + vote results filterable by followed members and topics. +- [ ] **Member Effectiveness Score** — nightly Celery task; transparent formula: sponsored bills count, bills advanced through stages, co-sponsored, committee participation, "bills enacted" metric. Stored in `member_scores` table. Displayed on member profile with formula explanation. +- [ ] **Representation Alignment View** — for each followed member, show how their votes and actions align with the user's followed topics. Based purely on followed members (no ZIP/district storage). Neutral presentation — no scorecard dunking. + +--- + +### Phase 5 — Polish *(slot in anytime, independent)* + +- [ ] **Search Improvements** — filters on global search (bill type, status, chamber, date range); search within a member's sponsored bills; topic-scoped search. +- [ ] **Desktop View** — wider multi-column layout optimized for large screens (sticky sidebar, expanded grid, richer bill detail layout). +- [ ] **Brief Regeneration UI** — admin button to delete existing briefs for a bill and re-queue LLM processing. Useful for improving citation/diff logic without a full re-poll. (Backend reprocess endpoint already exists.) +- [ ] **first_name / last_name Backfill** — Celery task to populate empty first/last from stored "Last, First" `name` field via split. + +--- + +### Later / Backlog + +- [ ] **Notification Channels v2** — email (SMTP), Discord webhook, Telegram bot (after ntfy + RSS v1 ships) +- [ ] **Source Viewer Option B** — in-app bill text viewer with cited passage highlighted and scroll-to-anchor. Deferred pending UX review of Option A (GovInfo link). +- [ ] **Raw Diff Panel** — Python `difflib` diff between stored document versions, shown as collapsible "Raw Changes" below amendment brief. Zero API calls. Deferred — AI amendment brief is the primary "what changed" story. +- [ ] **Shareable Collection Subscriptions** — "Follow this collection" mechanic so other users can subscribe to a public collection and get its bills added to their feed. diff --git a/backend/app/api/admin.py b/backend/app/api/admin.py index 2233aeb..12add69 100644 --- a/backend/app/api/admin.py +++ b/backend/app/api/admin.py @@ -1,5 +1,5 @@ from fastapi import APIRouter, Depends, HTTPException -from sqlalchemy import func, select +from sqlalchemy import func, select, text from sqlalchemy.ext.asyncio import AsyncSession from app.core.dependencies import get_current_admin @@ -98,18 +98,35 @@ async def get_stats( amendment_briefs = (await db.execute( select(func.count()).select_from(BillBrief).where(BillBrief.brief_type == "amendment") )).scalar() + uncited_briefs = (await db.execute( + text(""" + SELECT COUNT(*) FROM bill_briefs + WHERE key_points IS NOT NULL + AND jsonb_array_length(key_points) > 0 + AND jsonb_typeof(key_points->0) = 'string' + """) + )).scalar() return { "total_bills": total_bills, "docs_fetched": docs_fetched, "briefs_generated": total_briefs, "full_briefs": full_briefs, "amendment_briefs": amendment_briefs, + "uncited_briefs": uncited_briefs, "remaining": total_bills - total_briefs, } # ── Celery Tasks ────────────────────────────────────────────────────────────── +@router.post("/backfill-citations") +async def backfill_citations(current_user: User = Depends(get_current_admin)): + """Delete pre-citation briefs and re-queue LLM processing using stored document text.""" + from app.workers.llm_processor import backfill_brief_citations + task = backfill_brief_citations.delay() + return {"task_id": task.id, "status": "queued"} + + @router.post("/backfill-sponsors") async def backfill_sponsors(current_user: User = Depends(get_current_admin)): from app.workers.congress_poller import backfill_sponsor_ids @@ -131,6 +148,13 @@ async def trigger_member_sync(current_user: User = Depends(get_current_admin)): return {"task_id": task.id, "status": "queued"} +@router.post("/trigger-fetch-actions") +async def trigger_fetch_actions(current_user: User = Depends(get_current_admin)): + from app.workers.congress_poller import fetch_actions_for_active_bills + task = fetch_actions_for_active_bills.delay() + return {"task_id": task.id, "status": "queued"} + + @router.post("/trigger-trend-scores") async def trigger_trend_scores(current_user: User = Depends(get_current_admin)): from app.workers.trend_scorer import calculate_all_trend_scores @@ -138,6 +162,16 @@ async def trigger_trend_scores(current_user: User = Depends(get_current_admin)): return {"task_id": task.id, "status": "queued"} +@router.post("/bills/{bill_id}/reprocess") +async def reprocess_bill(bill_id: str, current_user: User = Depends(get_current_admin)): + """Queue document and action fetches for a specific bill. Useful for debugging.""" + from app.workers.document_fetcher import fetch_bill_documents + from app.workers.congress_poller import fetch_bill_actions + doc_task = fetch_bill_documents.delay(bill_id) + actions_task = fetch_bill_actions.delay(bill_id) + return {"task_ids": {"documents": doc_task.id, "actions": actions_task.id}} + + @router.get("/task-status/{task_id}") async def get_task_status(task_id: str, current_user: User = Depends(get_current_admin)): from app.workers.celery_app import celery_app diff --git a/backend/app/workers/celery_app.py b/backend/app/workers/celery_app.py index a84225c..8cf8796 100644 --- a/backend/app/workers/celery_app.py +++ b/backend/app/workers/celery_app.py @@ -68,5 +68,9 @@ celery_app.conf.update( "task": "app.workers.member_interest.calculate_all_member_trend_scores", "schedule": crontab(hour=3, minute=0), }, + "fetch-actions-active-bills": { + "task": "app.workers.congress_poller.fetch_actions_for_active_bills", + "schedule": crontab(hour=4, minute=0), # 4 AM UTC, after trend + member scoring + }, }, ) diff --git a/backend/app/workers/congress_poller.py b/backend/app/workers/congress_poller.py index f27d40d..70bc531 100644 --- a/backend/app/workers/congress_poller.py +++ b/backend/app/workers/congress_poller.py @@ -6,8 +6,11 @@ Uses fromDateTime to fetch only recently updated bills. All operations are idempotent. """ import logging +import time from datetime import datetime, timedelta, timezone +from sqlalchemy import or_ + from app.database import get_sync_db from app.models import Bill, BillAction, Member, AppSetting from app.services import congress_api @@ -84,9 +87,10 @@ def poll_congress_bills(self): db.add(Bill(**parsed)) db.commit() new_count += 1 - # Enqueue document fetch + # Enqueue document and action fetches from app.workers.document_fetcher import fetch_bill_documents fetch_bill_documents.delay(bill_id) + fetch_bill_actions.delay(bill_id) else: _update_bill_if_changed(db, existing, parsed) updated_count += 1 @@ -198,6 +202,101 @@ def backfill_sponsor_ids(self): db.close() +@celery_app.task(bind=True, max_retries=3, name="app.workers.congress_poller.fetch_bill_actions") +def fetch_bill_actions(self, bill_id: str): + """Fetch and sync all actions for a bill from Congress.gov. Idempotent.""" + db = get_sync_db() + try: + bill = db.get(Bill, bill_id) + if not bill: + logger.warning(f"fetch_bill_actions: bill {bill_id} not found") + return + + offset = 0 + inserted = 0 + while True: + try: + response = congress_api.get_bill_actions( + bill.congress_number, bill.bill_type, bill.bill_number, offset=offset + ) + except Exception as exc: + raise self.retry(exc=exc, countdown=60) + + actions_data = response.get("actions", []) + if not actions_data: + break + + for action in actions_data: + action_date_str = action.get("actionDate") + action_text = action.get("text", "") + action_type = action.get("type") + chamber = action.get("chamber") + + # Idempotency check: skip if (bill_id, action_date, action_text) exists + exists = ( + db.query(BillAction) + .filter( + BillAction.bill_id == bill_id, + BillAction.action_date == action_date_str, + BillAction.action_text == action_text, + ) + .first() + ) + if not exists: + db.add(BillAction( + bill_id=bill_id, + action_date=action_date_str, + action_text=action_text, + action_type=action_type, + chamber=chamber, + )) + inserted += 1 + + db.commit() + offset += 250 + if len(actions_data) < 250: + break + + bill.actions_fetched_at = datetime.now(timezone.utc) + db.commit() + logger.info(f"fetch_bill_actions: {bill_id} — inserted {inserted} new actions") + return {"bill_id": bill_id, "inserted": inserted} + except Exception as exc: + db.rollback() + raise + finally: + db.close() + + +@celery_app.task(bind=True, name="app.workers.congress_poller.fetch_actions_for_active_bills") +def fetch_actions_for_active_bills(self): + """Nightly batch: enqueue action fetches for recently active bills missing action data.""" + db = get_sync_db() + try: + cutoff = datetime.now(timezone.utc).date() - timedelta(days=30) + bills = ( + db.query(Bill) + .filter( + Bill.latest_action_date >= cutoff, + or_( + Bill.actions_fetched_at.is_(None), + Bill.latest_action_date > Bill.actions_fetched_at, + ), + ) + .limit(200) + .all() + ) + queued = 0 + for bill in bills: + fetch_bill_actions.delay(bill.bill_id) + queued += 1 + time.sleep(0.2) # ~5 tasks/sec to avoid Redis burst + logger.info(f"fetch_actions_for_active_bills: queued {queued} bills") + return {"queued": queued} + finally: + db.close() + + def _update_bill_if_changed(db, existing: Bill, parsed: dict) -> bool: """Update bill fields if anything has changed. Returns True if updated.""" changed = False @@ -210,7 +309,8 @@ def _update_bill_if_changed(db, existing: Bill, parsed: dict) -> bool: if changed: existing.last_checked_at = datetime.now(timezone.utc) db.commit() - # Check for new text versions now that the bill has changed + # Check for new text versions and sync actions now that the bill has changed from app.workers.document_fetcher import fetch_bill_documents fetch_bill_documents.delay(existing.bill_id) + fetch_bill_actions.delay(existing.bill_id) return changed diff --git a/backend/app/workers/llm_processor.py b/backend/app/workers/llm_processor.py index ede2822..fb2dd3f 100644 --- a/backend/app/workers/llm_processor.py +++ b/backend/app/workers/llm_processor.py @@ -3,6 +3,9 @@ LLM processor — generates AI briefs for fetched bill documents. Triggered by document_fetcher after successful text retrieval. """ import logging +import time + +from sqlalchemy import text from app.database import get_sync_db from app.models import Bill, BillBrief, BillDocument, Member @@ -106,3 +109,55 @@ def process_document_with_llm(self, document_id: int): raise self.retry(exc=exc, countdown=300) # 5 min backoff for LLM failures finally: db.close() + + +@celery_app.task(bind=True, name="app.workers.llm_processor.backfill_brief_citations") +def backfill_brief_citations(self): + """ + Find briefs generated before citation support was added (key_points contains plain + strings instead of {text, citation, quote} objects), delete them, and re-queue + LLM processing against the already-stored document text. + + No Congress.gov or GovInfo calls — only LLM calls. + """ + db = get_sync_db() + try: + uncited = db.execute(text(""" + SELECT id, document_id, bill_id + FROM bill_briefs + WHERE key_points IS NOT NULL + AND jsonb_array_length(key_points) > 0 + AND jsonb_typeof(key_points->0) = 'string' + """)).fetchall() + + total = len(uncited) + queued = 0 + skipped = 0 + + for row in uncited: + if not row.document_id: + skipped += 1 + continue + + # Confirm the document still has text before deleting the brief + doc = db.get(BillDocument, row.document_id) + if not doc or not doc.raw_text: + skipped += 1 + continue + + brief = db.get(BillBrief, row.id) + if brief: + db.delete(brief) + db.commit() + + process_document_with_llm.delay(row.document_id) + queued += 1 + time.sleep(0.1) # Avoid burst-queuing all LLM tasks at once + + logger.info( + f"backfill_brief_citations: {total} uncited briefs found, " + f"{queued} re-queued, {skipped} skipped (no document text)" + ) + return {"total": total, "queued": queued, "skipped": skipped} + finally: + db.close() diff --git a/frontend/app/bills/[id]/page.tsx b/frontend/app/bills/[id]/page.tsx index af374a0..c4797bc 100644 --- a/frontend/app/bills/[id]/page.tsx +++ b/frontend/app/bills/[id]/page.tsx @@ -4,12 +4,12 @@ import { use } from "react"; import Link from "next/link"; import { ArrowLeft, ExternalLink, User } from "lucide-react"; import { useBill, useBillTrend } from "@/lib/hooks/useBills"; -import { AIBriefCard } from "@/components/bills/AIBriefCard"; +import { BriefPanel } from "@/components/bills/BriefPanel"; import { ActionTimeline } from "@/components/bills/ActionTimeline"; import { TrendChart } from "@/components/bills/TrendChart"; import { NewsPanel } from "@/components/bills/NewsPanel"; import { FollowButton } from "@/components/shared/FollowButton"; -import { billLabel, formatDate, partyBadgeColor, cn } from "@/lib/utils"; +import { billLabel, congressLabel, formatDate, partyBadgeColor, cn } from "@/lib/utils"; export default function BillDetailPage({ params }: { params: Promise<{ id: string }> }) { const { id } = use(params); @@ -46,7 +46,7 @@ export default function BillDetailPage({ params }: { params: Promise<{ id: strin {label} {bill.chamber} - 119th Congress + {congressLabel(bill.congress_number)}
+ ⚠ {stats.uncited_briefs.toLocaleString()} brief{stats.uncited_briefs !== 1 ? "s" : ""} missing citations — run Backfill Citations to fix +
+ )}{name}: task {id} queued
diff --git a/frontend/components/bills/BriefPanel.tsx b/frontend/components/bills/BriefPanel.tsx new file mode 100644 index 0000000..5c4888c --- /dev/null +++ b/frontend/components/bills/BriefPanel.tsx @@ -0,0 +1,116 @@ +"use client"; + +import { useState } from "react"; +import { ChevronDown, ChevronRight, RefreshCw } from "lucide-react"; +import { BriefSchema } from "@/lib/types"; +import { AIBriefCard } from "@/components/bills/AIBriefCard"; +import { formatDate } from "@/lib/utils"; + +interface BriefPanelProps { + briefs?: BriefSchema[] | null; +} + +const TYPE_LABEL: Record