From 1e37c99599161f6214c6da3486db8820913de7a6 Mon Sep 17 00:00:00 2001 From: Jack Levy Date: Sun, 1 Mar 2026 17:34:45 -0500 Subject: [PATCH] feat(phase2): fact/inference labeling, change-driven alerts, admin cleanup - Add label: cited_fact | inference to LLM brief schema (all 4 providers) - Inferred badge in AIBriefCard for inference-labeled points - backfill_brief_labels Celery task: classifies existing cited points in-place - POST /api/admin/backfill-labels + unlabeled_briefs stat counter - Expand milestone keywords: markup, conference - Add is_referral_action() for committee referrals (referred to) - Two-tier milestone notifications: progress tier (all follow modes) and referral tier (pocket_veto/boost only, neutral suppressed) - Topic followers now receive bill_updated milestone notifications via latest brief topic_tags lookup in _update_bill_if_changed() - Admin Manual Controls: collapsible Maintenance section for backfill tasks - Update ARCHITECTURE.md and roadmap for Phase 2 completion Co-Authored-By: Jack Levy --- ARCHITECTURE.md | 104 +++++++-- PocketVeto — Feature Roadmap.md | 63 ++++- backend/app/api/admin.py | 26 +++ backend/app/services/llm_service.py | 23 +- backend/app/workers/congress_poller.py | 25 +- backend/app/workers/llm_processor.py | 107 +++++++++ .../app/workers/notification_dispatcher.py | 7 + backend/app/workers/notification_utils.py | 36 ++- frontend/app/settings/page.tsx | 218 +++++++++++------- frontend/components/bills/AIBriefCard.tsx | 8 + frontend/lib/api.ts | 3 + frontend/lib/types.ts | 1 + 12 files changed, 500 insertions(+), 121 deletions(-) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 961b5f3..c84d3f0 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -238,9 +238,11 @@ Indexes: `bill_id`, `topic_tags` (GIN for JSONB containment queries) { "text": "The bill allocates $50B for defense", "citation": "Section 301(a)(2)", - "quote": "There is hereby appropriated for fiscal year 2026, $50,000,000,000 for the Department of Defense..." + "quote": "There is hereby appropriated for fiscal year 2026, $50,000,000,000 for the Department of Defense...", + "label": "cited_fact" } ``` +`label` is `"cited_fact"` when the claim is explicitly stated in the quoted text, or `"inference"` when it is an analytical interpretation. Old briefs without this field render without a badge (backward compatible). --- @@ -324,6 +326,7 @@ News articles correlated to a specific member of Congress. | user_id | int (FK → users, CASCADE) | | | follow_type | varchar | `bill`, `member`, `topic` | | follow_value | varchar | bill_id, bioguide_id, or topic name | +| follow_mode | varchar | `neutral` \| `pocket_veto` \| `pocket_boost` (default `neutral`) | | created_at | timestamptz | | Unique constraint: `(user_id, follow_type, follow_value)` @@ -397,12 +400,13 @@ Stores notification events for dispatching to user channels (ntfy, RSS). | id | int (PK) | | | user_id | int (FK → users, CASCADE) | | | bill_id | varchar (FK → bills, SET NULL) | nullable | -| event_type | varchar | e.g. `new_brief`, `bill_updated`, `new_action` | -| headline | text | Short description for ntfy title | -| body | text | Longer description for ntfy message / RSS content | -| dispatched_at | timestamptz (nullable) | NULL = not yet sent | +| event_type | varchar | `new_document`, `new_amendment`, `bill_updated` | +| payload | jsonb | `{bill_title, bill_label, brief_summary, bill_url, milestone_tier}` | +| dispatched_at | timestamptz (nullable) | NULL = pending dispatch | | created_at | timestamptz | | +`milestone_tier` in payload: `"progress"` (passed, signed, markup, conference, etc.) or `"referral"` (committee referral). Neutral follows silently skip referral-tier events; pocket_veto and pocket_boost receive them as early warnings. + --- ## Alembic Migrations @@ -442,11 +446,12 @@ Auth header: `Authorization: Bearer ` | Method | Path | Auth | Description | |---|---|---|---| -| GET | `/` | — | Paginated bill list. Query: `chamber`, `topic`, `sponsor_id`, `q`, `page`, `per_page`, `sort`. | +| GET | `/` | — | Paginated bill list. Query: `chamber`, `topic`, `sponsor_id`, `q`, `page`, `per_page`, `sort`. Includes `has_document` flag per bill via a single batch query. | | GET | `/{bill_id}` | — | Full bill detail with sponsor, actions, briefs, news, trend scores. | | GET | `/{bill_id}/actions` | — | Action timeline, newest first. | | GET | `/{bill_id}/news` | — | Related news articles, limit 20. | | GET | `/{bill_id}/trend` | — | Trend score history. Query: `days` (7–365, default 30). | +| POST | `/{bill_id}/draft-letter` | — | Generate a constituent letter draft via the configured LLM. Body: `{stance, recipient, tone, selected_points, include_citations, zip_code?}`. Returns `{draft: string}`. ZIP code is used in the prompt only — never stored or logged. | ### `/api/members` @@ -503,7 +508,7 @@ Auth header: `Authorization: Bearer ` | GET | `/users` | Admin | All users with follow counts. | | DELETE | `/users/{id}` | Admin | Delete user (cannot delete self). Cascades follows. | | PATCH | `/users/{id}/toggle-admin` | Admin | Promote/demote admin status (cannot change self). | -| GET | `/stats` | Admin | Pipeline counters: total bills, docs fetched, briefs generated, pending LLM, missing metadata/sponsors/actions, uncited briefs. | +| GET | `/stats` | Admin | Pipeline counters: total bills, docs fetched, briefs generated, pending LLM, missing metadata/sponsors/actions, uncited briefs, unlabeled briefs (cited objects without a fact/inference label). | | GET | `/api-health` | Admin | Test each external API in parallel; returns status + latency for Congress.gov, GovInfo, NewsAPI, Google News. | | POST | `/trigger-poll` | Admin | Queue immediate Congress.gov poll. | | POST | `/trigger-member-sync` | Admin | Queue member sync. | @@ -513,6 +518,7 @@ Auth header: `Authorization: Bearer ` | POST | `/backfill-sponsors` | Admin | Queue one-off task to populate `sponsor_id` on bills where it is NULL. | | POST | `/backfill-metadata` | Admin | Fill null `introduced_date`, `chamber`, `congress_url` by re-fetching bill detail. | | POST | `/backfill-citations` | Admin | Delete pre-citation briefs and re-queue LLM using stored document text. | +| POST | `/backfill-labels` | Admin | Classify existing cited brief points as `cited_fact` or `inference` in-place — one compact LLM call per brief, no re-generation. | | POST | `/resume-analysis` | Admin | Re-queue LLM for docs with no brief; re-queue doc fetch for bills with no doc. | | POST | `/bills/{bill_id}/reprocess` | Admin | Queue document + action fetches for a specific bill (debugging). | | GET | `/task-status/{task_id}` | Admin | Celery task status and result. | @@ -570,6 +576,12 @@ Auth header: `Authorization: Bearer ` has no sponsor data), upserts Member, sets bill.sponsor_id ↳ New bills → fetch_bill_documents.delay(bill_id) ↳ Updated bills → fetch_bill_documents.delay(bill_id) if changed + ↳ Updated bills → emit bill_updated notification if action is a milestone: + - "progress" tier: passed/failed, signed/vetoed, enacted, markup, conference, + reported from committee, placed on calendar, cloture, roll call + → all follow types (bill, sponsor, topic) receive notification + - "referral" tier: referred to committee + → pocket_veto and pocket_boost only; neutral follows silently skip 2. document_fetcher.fetch_bill_documents(bill_id) ↳ Gets text versions from Congress.gov (XML preferred, falls back to HTML/PDF) @@ -641,6 +653,7 @@ All providers implement: ```python generate_brief(doc_text, bill_metadata) → ReverseBrief generate_amendment_brief(new_text, prev_text, bill_metadata) → ReverseBrief +generate_text(prompt) → str # free-form text, used by draft letter generator ``` ### ReverseBrief Dataclass @@ -649,8 +662,8 @@ generate_amendment_brief(new_text, prev_text, bill_metadata) → ReverseBrief @dataclass class ReverseBrief: summary: str - key_points: list[dict] # [{text, citation, quote}] - risks: list[dict] # [{text, citation, quote}] + key_points: list[dict] # [{text, citation, quote, label}] + risks: list[dict] # [{text, citation, quote, label}] deadlines: list[dict] # [{date, description}] topic_tags: list[str] llm_provider: str @@ -664,16 +677,28 @@ class ReverseBrief: { "summary": "2-4 paragraph plain-language explanation", "key_points": [ - {"text": "claim", "citation": "Section X(y)", "quote": "verbatim excerpt ≤80 words"} + { + "text": "claim", + "citation": "Section X(y)", + "quote": "verbatim excerpt ≤80 words", + "label": "cited_fact" + } ], "risks": [ - {"text": "concern", "citation": "Section X(y)", "quote": "verbatim excerpt ≤80 words"} + { + "text": "concern", + "citation": "Section X(y)", + "quote": "verbatim excerpt ≤80 words", + "label": "inference" + } ], "deadlines": [{"date": "YYYY-MM-DD or null", "description": "..."}], "topic_tags": ["healthcare", "taxation"] } ``` +`label` classification rules baked into the system prompt: `"cited_fact"` if the claim is explicitly stated in the quoted text; `"inference"` if it is an analytical interpretation, projection, or implication not literally stated. The UI shows a neutral "Inferred" badge on inference items only (cited_fact is the clean default). + **Amendment brief prompt** focuses on what changed between document versions. **Smart truncation:** Bills exceeding the token budget are trimmed — 75% of budget from the start (preamble/purpose), 25% from the end (enforcement/effective dates), with an omission notice in the middle. @@ -715,6 +740,7 @@ Renders the LLM brief. For cited items (new format), shows a `§ Section X(y)` c - Blockquoted verbatim excerpt from the bill - "View source →" link to GovInfo (opens in new tab) - One chip open at a time per card +- Inference items show a neutral "Inferred" badge (analytical interpretation, not a literal quote) - Old plain-string briefs render without chips (graceful backward compat) **`ActionTimeline.tsx`** @@ -729,8 +755,11 @@ Client component wrapping the entire app. Waits for Zustand hydration, then redi **`Sidebar.tsx`** Navigation with: Home, Bills, Members, Following, Topics, Settings (admin only). Shows current user email + logout button at the bottom. Accepts optional `onClose` prop — when provided (mobile drawer context), renders an X close button in the header and calls `onClose` on every nav link click. +**`DraftLetterPanel.tsx`** +Collapsible panel rendered below `BriefPanel` on the bill detail page (only when a brief exists). Lets users select up to 3 cited points from the brief, choose stance (YES/NO), tone (short/polite/firm), and optionally enter a ZIP code (not stored). Stance auto-populates from the user's follow mode (`pocket_boost` → YES, `pocket_veto` → NO); clears if they unfollow. Recipient (house/senate) is derived from the bill's chamber. Calls `POST /{bill_id}/draft-letter` and renders the plain-text draft in a readonly textarea with a copy-to-clipboard button. + **`BillCard.tsx`** -Compact bill preview showing bill ID, title, sponsor with party badge, latest action date, and status. +Compact bill preview showing bill ID, title, sponsor with party badge, latest action date, status, and a text availability indicator: `Brief` (green, analysis done) / `Pending` (amber, text retrieved but not yet analysed) / `No text` (muted, nothing published on Congress.gov). **`TrendChart.tsx`** Line chart of `composite_score` over time with tooltip breakdown of each data source. @@ -805,7 +834,7 @@ Separate queues prevent a flood of LLM tasks from blocking time-sensitive pollin All LLM providers implement the same interface. Switching providers is a single admin setting change — no code changes, no restart required (the factory reads from DB on each task invocation). ### JSONB for Flexible Brief Storage -`key_points`, `risks`, `deadlines`, `topic_tags` are stored as JSONB. This means the schema change from `list[str]` to `list[{text, citation, quote}]` required no migration — only the LLM prompt and application code changed. Old string-format briefs and new cited-object briefs coexist in the same column. +`key_points`, `risks`, `deadlines`, `topic_tags` are stored as JSONB. This means schema changes (adding `citation`/`quote` in v0.2.0, adding `label` in v0.6.0) required no migrations — only the LLM prompt and application code changed. Old string-format briefs, cited-object briefs without labels, and fully-labelled briefs all coexist in the same column and render correctly at each fidelity level. ### Redis-backed Beat Schedule (RedBeat) The Celery Beat schedule is stored in Redis rather than in memory. This means the beat scheduler can restart without losing schedule state or double-firing tasks. @@ -915,6 +944,55 @@ Nginx uses `resolver 127.0.0.11 valid=10s` (Docker's internal DNS) so upstream c - `introduced_date` shown conditionally (not rendered when null, preventing "Introduced: —") - Admin reprocess endpoint: `POST /api/admin/bills/{bill_id}/reprocess` +### v0.5.0 — Follow Modes, Public Browsing & Draft Letter Generator + +**Follow Modes:** +- `follow_mode` column on `follows` table: `neutral | pocket_veto | pocket_boost` +- `FollowButton` replaced with a mode-selector dropdown (shield/zap/heart icons, descriptions for each mode) +- `pocket_veto` — alert only on advancement milestones; `pocket_boost` — all changes + action prompts +- Mode stored per-follow; respected by notification dispatcher + +**Public Browsing:** +- Unauthenticated guests can browse bills, members, topics, and the trending dashboard +- `AuthModal` gates follow and other interactive actions +- Sidebar and nav adapt to guest state (no email/logout shown) +- All public endpoints already auth-free; guard refactored to allow guest reads + +**Draft Constituent Letter Generator (email_gen):** +- `DraftLetterPanel.tsx` — collapsible UI below `BriefPanel` for bills with a brief +- User selects up to 3 cited points from the brief, picks stance (YES/NO), tone, optional ZIP (not stored) +- Stance pre-fills from follow mode; clears on unfollow (ref-tracked, not effect-guarded) +- Recipient derived from bill chamber — no dropdown needed +- `POST /api/bills/{bill_id}/draft-letter` endpoint: reads LLM provider/model from `AppSetting` (respects Settings page), wraps LLM errors with human-readable messages (quota, rate limit, auth) +- `generate_text(prompt) → str` added to `LLMProvider` ABC and all four providers + +**Bill Text Status Indicators:** +- `has_document` field added to `BillSchema` (list endpoint) via a single batch `SELECT DISTINCT` — no per-card queries +- `BillCard` shows: `Brief` (green) / `Pending` (amber) / `No text` (muted) based on brief + document state + +### v0.6.0 — Phase 2: Change-driven Alerts & Fact/Inference Labeling + +**Change-driven Alerts:** +- `notification_utils.py` milestone keyword list expanded: added `"markup"` (markup sessions) and `"conference"` (conference committee) +- New `is_referral_action()` classifier for committee referrals (`"referred to"`) +- Two-tier notification system: `milestone_tier` field in `NotificationEvent.payload` + - `"progress"` — high-signal milestones (passed, signed, markup, etc.): all follow types notified + - `"referral"` — committee referral: pocket_veto and pocket_boost notified; neutral silently dropped +- **Topic followers now receive `bill_updated` milestone notifications** — previously they only received `new_document`/`new_amendment` events. Fixed by querying the bill's latest brief for `topic_tags` inside `_update_bill_if_changed()` +- All three follow types (bill, sponsor, topic) covered for both tiers + +**Fact vs Inference Labeling:** +- `label: "cited_fact" | "inference"` added to every cited key_point and risk in the LLM JSON schema +- System prompt updated for all four providers (OpenAI, Anthropic, Gemini, Ollama) +- UI: neutral "Inferred" badge shown next to inference items in `AIBriefCard`; cited_fact items render cleanly without a badge +- `backfill_brief_labels` Celery task: classifies existing cited points in-place — one compact LLM call per brief (all points batched), updates JSONB with `flag_modified`, no brief re-generation +- `POST /api/admin/backfill-labels` endpoint + "Backfill Fact/Inference Labels" button in Admin panel +- `unlabeled_briefs` counter added to `/api/admin/stats` and pipeline breakdown table + +**Admin Panel Cleanup:** +- Manual Controls split into two sections: always-visible recurring controls (Poll, Members, Trends, Actions, Resume) and a collapsible **Maintenance** section for one-time backfill tasks +- Maintenance section header shows "⚠ action needed" when any backfill has a non-zero count + ### v0.2.2 — Sponsor Linking & Search Fixes - **Root cause fixed:** Congress.gov list API does not return sponsor data — only the detail endpoint does. Poller now calls the detail endpoint for each new bill to get the sponsor and populate `bill.sponsor_id` - **Backfill task:** `backfill_sponsor_ids` Celery task + `/api/admin/backfill-sponsors` endpoint + "Backfill Sponsors" button in Admin UI — fixes existing bills with `NULL` sponsor_id (~10 req/sec, ~3 min for 1,600 bills) diff --git a/PocketVeto — Feature Roadmap.md b/PocketVeto — Feature Roadmap.md index 7f4e5ec..2425b5d 100644 --- a/PocketVeto — Feature Roadmap.md +++ b/PocketVeto — Feature Roadmap.md @@ -31,6 +31,10 @@ - [x] Backfill All Actions — admin task to fetch action history for all pre-existing bills - [x] Notifications (Phase 1) — ntfy dispatch, RSS feed, per-user settings UI, 5-min dispatcher beat task - [x] Brief Regeneration UI — admin button to delete existing briefs for a bill and re-queue LLM processing. Useful for improving citation/diff logic without a full re-poll. (Backend reprocess endpoint already exists.) +- [x] Follow Modes — `neutral | pocket_veto | pocket_boost` on the `follows` table; FollowButton mode selector with descriptions and tooltips +- [x] Public Browsing — unauthenticated guests browse bills, members, topics, and trending dashboard; AuthModal gates follow/interactive actions; sidebar and nav adapt to guest state +- [x] Draft Letter Generator — collapsible panel on bill detail pages; select up to 3 cited brief points, stance auto-fills from follow mode, recipient derived from chamber, ZIP optional and never stored; calls configured LLM provider +- [x] Bill Text Status Indicators — BillCard shows Brief / Pending / No text badge; backed by a single batch query on the list endpoint --- @@ -40,8 +44,8 @@ ### Phase 2 — High Impact *(can run in parallel after Phase 1)* -- [ ] **Change-driven Alerts** — emit `notification_event` from poller/document fetcher on material changes: new doc version, substitute text, committee report, vote scheduled/result. Filter out procedural-only action text. Fan out to ntfy + RSS. -- [ ] **Fact vs Inference Labeling** — add `label: "cited_fact" | "inference"` + optional `confidence` field to each `key_point` and `risk` in the LLM JSON schema. Prompt engineering change + BillBrief schema migration. UI: small badge on each bullet (no color politics — neutral labels only). +- [x] **Change-driven Alerts** — milestone keywords expanded (markup, conference, referral tier); topic followers now receive bill_updated milestone events; committee referral events delivered to pocket_veto/boost but suppressed for neutral; all three follow types covered for both tiers. +- [x] **Fact vs Inference Labeling** — `label: "cited_fact" | "inference"` on every cited key_point and risk; prompt engineering updated for all providers; "Inferred" badge in citation UI; backfill task for existing briefs. --- @@ -76,10 +80,9 @@ - [ ] **Source Viewer Option B** — in-app bill text viewer with cited passage highlighted and scroll-to-anchor. Deferred pending UX review of Option A (GovInfo link). - [ ] **Raw Diff Panel** — Python `difflib` diff between stored document versions, shown as collapsible "Raw Changes" below amendment brief. Zero API calls. Deferred — AI amendment brief is the primary "what changed" story. - [ ] **Shareable Collection Subscriptions** — "Follow this collection" mechanic so other users can subscribe to a public collection and get its bills added to their feed. -- [ ] Pocket Veto mode (follow stance) — toggle on a bill to treat it as “I don’t want this to pass”; adds to watchlist and triggers milestone alerts (committee report-out, calendared, vote scheduled, passed chamber, etc.) -- [ ] Pocket Veto notification rules — alert only on advancement milestones + failure outcomes (failed committee / failed floor / stalled) -- [ ] Follow modes — support Neutral (normal follow) + Pocket Veto now; optional Pocket Boost later -- [ ] UI: FollowButton becomes FollowMode selector (Neutral / Pocket Veto) with explanation tooltip +- [x] Pocket Veto mode (follow stance) — toggle on a bill to treat it as “I don’t want this to pass”; adds to watchlist and triggers milestone alerts +- [x] Follow modes — Neutral + Pocket Veto + Pocket Boost; FollowButton is a mode selector with explanation tooltips +- [ ] Pocket Veto notification rules — alert only on advancement milestones + failure outcomes (failed committee / failed floor / stalled) — notification dispatcher needs to filter by follow_mode ### PocketVeto function @@ -162,3 +165,51 @@ Then alert rules can be: - pocket_boost: “action points” + milestones +Yes — that’s a solid idea **if it’s done as a “welcome + how it works” nudge**, not an annoying pop-up that blocks the UI. + +A toast can work, but for a first-time user you’ll usually get better results with a **dismissible banner** or a **one-time “welcome” card** on the dashboard, because: + +* toasts disappear (people miss them) +* first-run onboarding usually needs at least one click (“Got it” / “Start here”) + +### Best pattern (low effort, high impact) + +**First visit → show a dismissible Welcome card/banner** (top of dashboard) with: + +* 1 sentence purpose +* 3 bullets of key features +* 2 buttons: **“Add my first follow”** and **“See a demo bill”** +* “Don’t show again” checkbox (or implicit on dismiss) + +You can still use a toast, but make it: + +* sticky until dismissed +* or paired with a banner/card + +### What it should say (copy you can paste) + +**Title:** Welcome to PocketVeto +**Body (tight):** + +* Follow bills, members, or topics (low-noise) +* See *what changed* in plain English +* Verify every claim with **Back to Source** citations + **Buttons:** “Add a follow” | “Load demo” + +### Implementation detail (no creepy tracking) + +Store a simple flag: + +* `localStorage.setItem("pv_seen_welcome", "1")` + +Don’t store it server-side unless you already have user accounts and it’s part of preferences. + +### Backlog item (checkboxes) + +* [ ] First-visit welcome UI (banner/card + optional toast) +* [ ] Dismiss + “don’t show again” (localStorage) +* [ ] CTA: Add first follow +* [ ] CTA: Load demo data (optional) +* [ ] Link: “How it works” page/modal (optional) + +If you tell me your UI stack (sounds like Next.js + shadcn/ui), I can give you a drop-in component for a clean welcome card + toast behavior. diff --git a/backend/app/api/admin.py b/backend/app/api/admin.py index 2ce10ee..60c3608 100644 --- a/backend/app/api/admin.py +++ b/backend/app/api/admin.py @@ -134,6 +134,23 @@ async def get_stats( bills_missing_actions = (await db.execute( text("SELECT COUNT(*) FROM bills WHERE actions_fetched_at IS NULL") )).scalar() + # Cited brief points (objects) that have no label yet + unlabeled_briefs = (await db.execute( + text(""" + SELECT COUNT(*) FROM bill_briefs + WHERE ( + key_points IS NOT NULL AND EXISTS ( + SELECT 1 FROM jsonb_array_elements(key_points) AS p + WHERE jsonb_typeof(p) = 'object' AND (p->>'label') IS NULL + ) + ) OR ( + risks IS NOT NULL AND EXISTS ( + SELECT 1 FROM jsonb_array_elements(risks) AS r + WHERE jsonb_typeof(r) = 'object' AND (r->>'label') IS NULL + ) + ) + """) + )).scalar() return { "total_bills": total_bills, "docs_fetched": docs_fetched, @@ -146,6 +163,7 @@ async def get_stats( "bills_missing_sponsor": bills_missing_sponsor, "bills_missing_metadata": bills_missing_metadata, "bills_missing_actions": bills_missing_actions, + "unlabeled_briefs": unlabeled_briefs, "remaining": total_bills - total_briefs, } @@ -204,6 +222,14 @@ async def backfill_metadata(current_user: User = Depends(get_current_admin)): return {"task_id": task.id, "status": "queued"} +@router.post("/backfill-labels") +async def backfill_labels(current_user: User = Depends(get_current_admin)): + """Classify existing cited brief points as fact or inference without re-generating briefs.""" + from app.workers.llm_processor import backfill_brief_labels + task = backfill_brief_labels.delay() + return {"task_id": task.id, "status": "queued"} + + @router.post("/resume-analysis") async def resume_analysis(current_user: User = Depends(get_current_admin)): """Re-queue LLM processing for docs with no brief, and document fetching for bills with no doc.""" diff --git a/backend/app/services/llm_service.py b/backend/app/services/llm_service.py index de13e0f..005d22a 100644 --- a/backend/app/services/llm_service.py +++ b/backend/app/services/llm_service.py @@ -22,10 +22,10 @@ Always respond with valid JSON matching exactly this schema: { "summary": "2-4 paragraph plain-language summary of what this bill does", "key_points": [ - {"text": "specific concrete fact", "citation": "Section X(y)", "quote": "verbatim excerpt from bill ≤80 words"} + {"text": "specific concrete fact", "citation": "Section X(y)", "quote": "verbatim excerpt from bill ≤80 words", "label": "cited_fact"} ], "risks": [ - {"text": "legitimate concern or challenge", "citation": "Section X(y)", "quote": "verbatim excerpt from bill ≤80 words"} + {"text": "legitimate concern or challenge", "citation": "Section X(y)", "quote": "verbatim excerpt from bill ≤80 words", "label": "cited_fact"} ], "deadlines": [{"date": "YYYY-MM-DD or null", "description": "what happens on this date"}], "topic_tags": ["healthcare", "taxation"] @@ -35,10 +35,14 @@ Rules: - summary: Explain WHAT the bill does, not whether it is good or bad. Be factual and complete. - key_points: 5-10 specific, concrete things the bill changes, authorizes, or appropriates. \ Each item MUST include "text" (your claim), "citation" (the section number, e.g. "Section 301(a)(2)"), \ -and "quote" (a verbatim excerpt of ≤80 words from that section that supports your claim). +"quote" (a verbatim excerpt of ≤80 words from that section that supports your claim), and "label". - risks: Legitimate concerns from any perspective — costs, implementation challenges, \ constitutional questions, unintended consequences. Include at least 2 even for benign bills. \ -Each item MUST include "text", "citation", and "quote" just like key_points. +Each item MUST include "text", "citation", "quote", and "label" just like key_points. +- label: "cited_fact" if the claim is directly and explicitly stated in the quoted text. \ +"inference" if the claim is an analytical interpretation, projection, or implication that goes \ +beyond what the text literally says (e.g. projected costs, likely downstream effects, \ +constitutional questions). When in doubt, use "inference". - deadlines: Only include if explicitly stated in the text. Use null for date if a deadline \ is mentioned without a specific date. Empty list if none. - topic_tags: 3-8 lowercase tags. Prefer these standard tags: healthcare, taxation, defense, \ @@ -90,10 +94,10 @@ Always respond with valid JSON matching exactly this schema: { "summary": "2-3 paragraph plain-language description of what changed in this version", "key_points": [ - {"text": "specific change", "citation": "Section X(y)", "quote": "verbatim excerpt from new version ≤80 words"} + {"text": "specific change", "citation": "Section X(y)", "quote": "verbatim excerpt from new version ≤80 words", "label": "cited_fact"} ], "risks": [ - {"text": "new concern introduced by this change", "citation": "Section X(y)", "quote": "verbatim excerpt from new version ≤80 words"} + {"text": "new concern introduced by this change", "citation": "Section X(y)", "quote": "verbatim excerpt from new version ≤80 words", "label": "cited_fact"} ], "deadlines": [{"date": "YYYY-MM-DD or null", "description": "new deadline added"}], "topic_tags": ["healthcare", "taxation"] @@ -103,9 +107,12 @@ Rules: - summary: Focus ONLY on what is different from the previous version. Be specific. - key_points: List concrete additions, removals, or modifications in this version. \ Each item MUST include "text" (your claim), "citation" (the section number, e.g. "Section 301(a)(2)"), \ -and "quote" (a verbatim excerpt of ≤80 words from the NEW version that supports your claim). +"quote" (a verbatim excerpt of ≤80 words from the NEW version that supports your claim), and "label". - risks: Only include risks that are new or changed relative to the previous version. \ -Each item MUST include "text", "citation", and "quote" just like key_points. +Each item MUST include "text", "citation", "quote", and "label" just like key_points. +- label: "cited_fact" if the claim is directly and explicitly stated in the quoted text. \ +"inference" if the claim is an analytical interpretation, projection, or implication that goes \ +beyond what the text literally says. When in doubt, use "inference". - deadlines: Only new or changed deadlines. Empty list if none. - topic_tags: Same standard tags as before — include any new topics this version adds. diff --git a/backend/app/workers/congress_poller.py b/backend/app/workers/congress_poller.py index b18c68a..cf3be96 100644 --- a/backend/app/workers/congress_poller.py +++ b/backend/app/workers/congress_poller.py @@ -338,12 +338,29 @@ def _update_bill_if_changed(db, existing: Bill, parsed: dict) -> bool: from app.workers.notification_utils import ( emit_bill_notification, emit_member_follow_notifications, + emit_topic_follow_notifications, is_milestone_action, + is_referral_action, ) - if is_milestone_action(parsed.get("latest_action_text", "")): - action_text = parsed["latest_action_text"] - emit_bill_notification(db, existing, "bill_updated", action_text) - emit_member_follow_notifications(db, existing, "bill_updated", action_text) + action_text = parsed.get("latest_action_text", "") + is_milestone = is_milestone_action(action_text) + is_referral = not is_milestone and is_referral_action(action_text) + if is_milestone or is_referral: + tier = "progress" if is_milestone else "referral" + emit_bill_notification(db, existing, "bill_updated", action_text, milestone_tier=tier) + emit_member_follow_notifications(db, existing, "bill_updated", action_text, milestone_tier=tier) + # Topic followers — pull tags from the bill's latest brief + from app.models.brief import BillBrief + latest_brief = ( + db.query(BillBrief) + .filter_by(bill_id=existing.bill_id) + .order_by(BillBrief.created_at.desc()) + .first() + ) + topic_tags = latest_brief.topic_tags or [] if latest_brief else [] + emit_topic_follow_notifications( + db, existing, "bill_updated", action_text, topic_tags, milestone_tier=tier + ) return changed diff --git a/backend/app/workers/llm_processor.py b/backend/app/workers/llm_processor.py index 66b731c..4e0280e 100644 --- a/backend/app/workers/llm_processor.py +++ b/backend/app/workers/llm_processor.py @@ -181,6 +181,113 @@ def backfill_brief_citations(self): db.close() +@celery_app.task(bind=True, name="app.workers.llm_processor.backfill_brief_labels") +def backfill_brief_labels(self): + """ + Add fact/inference labels to existing cited brief points without re-generating them. + Sends one compact classification call per brief (all unlabeled points batched). + Skips briefs already fully labeled and plain-string points (no quote to classify). + """ + import json + from sqlalchemy.orm.attributes import flag_modified + from app.models.setting import AppSetting + + db = get_sync_db() + try: + unlabeled_ids = db.execute(text(""" + SELECT id FROM bill_briefs + WHERE ( + key_points IS NOT NULL AND EXISTS ( + SELECT 1 FROM jsonb_array_elements(key_points) AS p + WHERE jsonb_typeof(p) = 'object' AND (p->>'label') IS NULL + ) + ) OR ( + risks IS NOT NULL AND EXISTS ( + SELECT 1 FROM jsonb_array_elements(risks) AS r + WHERE jsonb_typeof(r) = 'object' AND (r->>'label') IS NULL + ) + ) + """)).fetchall() + + total = len(unlabeled_ids) + updated = 0 + skipped = 0 + + prov_row = db.get(AppSetting, "llm_provider") + model_row = db.get(AppSetting, "llm_model") + provider = get_llm_provider( + prov_row.value if prov_row else None, + model_row.value if model_row else None, + ) + + for row in unlabeled_ids: + brief = db.get(BillBrief, row.id) + if not brief: + skipped += 1 + continue + + # Collect all unlabeled cited points across both fields + to_classify: list[tuple[str, int, dict]] = [] + for field_name in ("key_points", "risks"): + for i, p in enumerate(getattr(brief, field_name) or []): + if isinstance(p, dict) and p.get("label") is None: + to_classify.append((field_name, i, p)) + + if not to_classify: + skipped += 1 + continue + + lines = [ + f'{i + 1}. TEXT: "{p["text"]}" | QUOTE: "{p.get("quote", "")}"' + for i, (_, __, p) in enumerate(to_classify) + ] + prompt = ( + "Classify each item as 'cited_fact' or 'inference'.\n" + "cited_fact = the claim is explicitly and directly stated in the quoted text.\n" + "inference = analytical interpretation, projection, or implication not literally stated.\n\n" + "Return ONLY a JSON array of strings, one per item, in order. No explanation.\n\n" + "Items:\n" + "\n".join(lines) + ) + + try: + raw = provider.generate_text(prompt).strip() + if raw.startswith("```"): + raw = raw.split("```")[1] + if raw.startswith("json"): + raw = raw[4:] + labels = json.loads(raw.strip()) + if not isinstance(labels, list) or len(labels) != len(to_classify): + logger.warning(f"Brief {brief.id}: label count mismatch, skipping") + skipped += 1 + continue + except Exception as exc: + logger.warning(f"Brief {brief.id}: classification failed: {exc}") + skipped += 1 + time.sleep(0.5) + continue + + fields_modified: set[str] = set() + for (field_name, point_idx, _), label in zip(to_classify, labels): + if label in ("cited_fact", "inference"): + getattr(brief, field_name)[point_idx]["label"] = label + fields_modified.add(field_name) + + for field_name in fields_modified: + flag_modified(brief, field_name) + + db.commit() + updated += 1 + time.sleep(0.2) + + logger.info( + f"backfill_brief_labels: {total} briefs found, " + f"{updated} updated, {skipped} skipped" + ) + return {"total": total, "updated": updated, "skipped": skipped} + finally: + db.close() + + @celery_app.task(bind=True, name="app.workers.llm_processor.resume_pending_analysis") def resume_pending_analysis(self): """ diff --git a/backend/app/workers/notification_dispatcher.py b/backend/app/workers/notification_dispatcher.py index 41c2c8c..62805e6 100644 --- a/backend/app/workers/notification_dispatcher.py +++ b/backend/app/workers/notification_dispatcher.py @@ -94,6 +94,13 @@ def dispatch_notifications(self): db.commit() continue + # Referral-tier events (committee referrals) are noisy for neutral follows; + # pocket_veto and pocket_boost users want them as early warnings + if follow_mode == "neutral" and (event.payload or {}).get("milestone_tier") == "referral": + event.dispatched_at = now + db.commit() + continue + prefs = user.notification_prefs or {} ntfy_url = prefs.get("ntfy_topic_url", "").strip() ntfy_auth_method = prefs.get("ntfy_auth_method", "none") diff --git a/backend/app/workers/notification_utils.py b/backend/app/workers/notification_utils.py index b8c78cb..d49b53b 100644 --- a/backend/app/workers/notification_utils.py +++ b/backend/app/workers/notification_utils.py @@ -10,8 +10,15 @@ _MILESTONE_KEYWORDS = [ "presented to the president", "ordered to be reported", "ordered reported", "reported by", "discharged", - "placed on", # placed on calendar + "placed on", # placed on calendar "cloture", "roll call", + "markup", # markup session scheduled/completed + "conference", # conference committee activity +] + +# Committee referral — meaningful for pocket_veto/boost but noisy for neutral +_REFERRAL_KEYWORDS = [ + "referred to", ] # Events created within this window for the same (user, bill, event_type) are suppressed @@ -23,7 +30,12 @@ def is_milestone_action(action_text: str) -> bool: return any(kw in t for kw in _MILESTONE_KEYWORDS) -def _build_payload(bill, action_summary: str) -> dict: +def is_referral_action(action_text: str) -> bool: + t = (action_text or "").lower() + return any(kw in t for kw in _REFERRAL_KEYWORDS) + + +def _build_payload(bill, action_summary: str, milestone_tier: str = "progress") -> dict: from app.config import settings base_url = (settings.PUBLIC_URL or settings.LOCAL_URL).rstrip("/") return { @@ -31,6 +43,7 @@ def _build_payload(bill, action_summary: str) -> dict: "bill_label": f"{bill.bill_type.upper()} {bill.bill_number}", "brief_summary": (action_summary or "")[:300], "bill_url": f"{base_url}/bills/{bill.bill_id}", + "milestone_tier": milestone_tier, } @@ -45,7 +58,9 @@ def _is_duplicate(db, user_id: int, bill_id: str, event_type: str) -> bool: ).filter(NotificationEvent.created_at > cutoff).first() is not None -def emit_bill_notification(db, bill, event_type: str, action_summary: str) -> int: +def emit_bill_notification( + db, bill, event_type: str, action_summary: str, milestone_tier: str = "progress" +) -> int: """Create NotificationEvent rows for every user following this bill. Returns count.""" from app.models.follow import Follow from app.models.notification import NotificationEvent @@ -54,7 +69,7 @@ def emit_bill_notification(db, bill, event_type: str, action_summary: str) -> in if not followers: return 0 - payload = _build_payload(bill, action_summary) + payload = _build_payload(bill, action_summary, milestone_tier) count = 0 for follow in followers: if _is_duplicate(db, follow.user_id, bill.bill_id, event_type): @@ -71,7 +86,9 @@ def emit_bill_notification(db, bill, event_type: str, action_summary: str) -> in return count -def emit_member_follow_notifications(db, bill, event_type: str, action_summary: str) -> int: +def emit_member_follow_notifications( + db, bill, event_type: str, action_summary: str, milestone_tier: str = "progress" +) -> int: """Notify users following the bill's sponsor (dedup prevents double-alerts for bill+member followers).""" if not bill.sponsor_id: return 0 @@ -83,7 +100,7 @@ def emit_member_follow_notifications(db, bill, event_type: str, action_summary: if not followers: return 0 - payload = _build_payload(bill, action_summary) + payload = _build_payload(bill, action_summary, milestone_tier) count = 0 for follow in followers: if _is_duplicate(db, follow.user_id, bill.bill_id, event_type): @@ -100,7 +117,10 @@ def emit_member_follow_notifications(db, bill, event_type: str, action_summary: return count -def emit_topic_follow_notifications(db, bill, event_type: str, action_summary: str, topic_tags: list) -> int: +def emit_topic_follow_notifications( + db, bill, event_type: str, action_summary: str, topic_tags: list, + milestone_tier: str = "progress", +) -> int: """Notify users following any of the bill's topic tags.""" if not topic_tags: return 0 @@ -120,7 +140,7 @@ def emit_topic_follow_notifications(db, bill, event_type: str, action_summary: s if not followers: return 0 - payload = _build_payload(bill, action_summary) + payload = _build_payload(bill, action_summary, milestone_tier) count = 0 for follow in followers: if _is_duplicate(db, follow.user_id, bill.bill_id, event_type): diff --git a/frontend/app/settings/page.tsx b/frontend/app/settings/page.tsx index f211079..57776da 100644 --- a/frontend/app/settings/page.tsx +++ b/frontend/app/settings/page.tsx @@ -17,6 +17,9 @@ import { Bell, Shield, Zap, + ChevronDown, + ChevronRight, + Wrench, } from "lucide-react"; import Link from "next/link"; import { settingsAPI, adminAPI, notificationsAPI, type AdminUser, type LLMModel, type ApiHealthResult } from "@/lib/api"; @@ -134,6 +137,7 @@ export default function SettingsPage() { const [taskIds, setTaskIds] = useState>({}); const [taskStatuses, setTaskStatuses] = useState>({}); const [confirmDelete, setConfirmDelete] = useState(null); + const [showMaintenance, setShowMaintenance] = useState(false); const testLLM = async () => { setTesting(true); @@ -316,6 +320,7 @@ export default function SettingsPage() { { label: "AI briefs generated", value: stats.briefs_generated, color: "text-green-600 dark:text-green-400", icon: "✅" }, { label: "Pending LLM analysis", value: stats.pending_llm, color: stats.pending_llm > 0 ? "text-amber-600 dark:text-amber-400" : "text-muted-foreground", icon: "🔄", action: stats.pending_llm > 0 ? "Resume Analysis" : undefined }, { label: "Briefs missing citations", value: stats.uncited_briefs, color: stats.uncited_briefs > 0 ? "text-amber-600 dark:text-amber-400" : "text-muted-foreground", icon: "⚠️", action: stats.uncited_briefs > 0 ? "Backfill Citations" : undefined }, + { label: "Briefs with unlabeled points", value: stats.unlabeled_briefs, color: stats.unlabeled_briefs > 0 ? "text-amber-600 dark:text-amber-400" : "text-muted-foreground", icon: "🏷️", action: stats.unlabeled_briefs > 0 ? "Backfill Labels" : undefined }, ].map(({ label, value, color, icon, note, action }) => (
@@ -637,82 +642,9 @@ export default function SettingsPage() { {/* Manual Controls */}

Manual Controls

-
- {([ - { - key: "poll", - name: "Trigger Poll", - description: "Check Congress.gov for newly introduced or updated bills. Runs automatically on a schedule — use this to force an immediate sync.", - fn: adminAPI.triggerPoll, - status: "on-demand", - }, - { - key: "members", - name: "Sync Members", - description: "Refresh all member profiles from Congress.gov including biography, current term, leadership roles, and contact information.", - fn: adminAPI.triggerMemberSync, - status: "on-demand", - }, - { - key: "trends", - name: "Calculate Trends", - description: "Score bill and member newsworthiness by counting recent news headlines and Google search interest. Updates the trend charts.", - fn: adminAPI.triggerTrendScores, - status: "on-demand", - }, - { - key: "actions", - name: "Fetch Bill Actions", - description: "Download the full legislative history (votes, referrals, amendments) for recently active bills and populate the timeline view.", - fn: adminAPI.triggerFetchActions, - status: "on-demand", - }, - { - key: "backfill-actions", - name: "Backfill All Action Histories", - description: "One-time catch-up: fetch action histories for all bills that were imported before this feature existed. Run once to populate timelines across your full bill archive.", - fn: adminAPI.backfillAllActions, - status: stats ? (stats.bills_missing_actions > 0 ? "needed" : "ok") : "on-demand", - count: stats?.bills_missing_actions, - countLabel: "bills missing action history", - }, - { - key: "sponsors", - name: "Backfill Sponsors", - description: "Link bill sponsors that weren't captured during the initial import. Safe to re-run — skips bills that already have a sponsor.", - fn: adminAPI.backfillSponsors, - status: stats ? (stats.bills_missing_sponsor > 0 ? "needed" : "ok") : "on-demand", - count: stats?.bills_missing_sponsor, - countLabel: "bills missing sponsor", - }, - { - key: "metadata", - name: "Backfill Dates & Links", - description: "Fill in missing introduced dates, chamber assignments, and congress.gov links by re-fetching bill detail from Congress.gov.", - fn: adminAPI.backfillMetadata, - status: stats ? (stats.bills_missing_metadata > 0 ? "needed" : "ok") : "on-demand", - count: stats?.bills_missing_metadata, - countLabel: "bills missing metadata", - }, - { - key: "citations", - name: "Backfill Citations", - description: "Regenerate AI briefs that were created before inline source citations were added. Deletes the old brief and re-runs LLM analysis using the already-stored bill text — no new Congress.gov calls.", - fn: adminAPI.backfillCitations, - status: stats ? (stats.uncited_briefs > 0 ? "needed" : "ok") : "on-demand", - count: stats?.uncited_briefs, - countLabel: "briefs need regeneration", - }, - { - key: "resume", - name: "Resume Analysis", - description: "Restart AI brief generation for bills where processing stalled or failed (e.g. after an LLM quota outage). Also re-queues document fetching for bills that have no text yet.", - fn: adminAPI.resumeAnalysis, - status: stats ? (stats.pending_llm > 0 ? "needed" : "ok") : "on-demand", - count: stats?.pending_llm, - countLabel: "bills pending analysis", - }, - ] as Array<{ + + {(() => { + type ControlItem = { key: string; name: string; description: string; @@ -720,7 +652,9 @@ export default function SettingsPage() { status: "ok" | "needed" | "on-demand"; count?: number; countLabel?: string; - }>).map(({ key, name, description, fn, status, count, countLabel }) => ( + }; + + const renderRow = ({ key, name, description, fn, status, count, countLabel }: ControlItem) => (
- {taskStatuses[key] === "running" ? ( - - ) : "Run"} + {taskStatuses[key] === "running" ? : "Run"}
- ))} -
+ ); + + const recurring: ControlItem[] = [ + { + key: "poll", + name: "Trigger Poll", + description: "Check Congress.gov for newly introduced or updated bills. Runs automatically on a schedule — use this to force an immediate sync.", + fn: adminAPI.triggerPoll, + status: "on-demand", + }, + { + key: "members", + name: "Sync Members", + description: "Refresh all member profiles from Congress.gov including biography, current term, leadership roles, and contact information.", + fn: adminAPI.triggerMemberSync, + status: "on-demand", + }, + { + key: "trends", + name: "Calculate Trends", + description: "Score bill and member newsworthiness by counting recent news headlines and Google search interest. Updates the trend charts.", + fn: adminAPI.triggerTrendScores, + status: "on-demand", + }, + { + key: "actions", + name: "Fetch Bill Actions", + description: "Download the full legislative history (votes, referrals, amendments) for recently active bills and populate the timeline view.", + fn: adminAPI.triggerFetchActions, + status: "on-demand", + }, + { + key: "resume", + name: "Resume Analysis", + description: "Restart AI brief generation for bills where processing stalled or failed (e.g. after an LLM quota outage). Also re-queues document fetching for bills that have no text yet.", + fn: adminAPI.resumeAnalysis, + status: stats ? (stats.pending_llm > 0 ? "needed" : "on-demand") : "on-demand", + count: stats?.pending_llm, + countLabel: "bills pending analysis", + }, + ]; + + const maintenance: ControlItem[] = [ + { + key: "backfill-actions", + name: "Backfill All Action Histories", + description: "One-time catch-up: fetch action histories for all bills that were imported before this feature existed.", + fn: adminAPI.backfillAllActions, + status: stats ? (stats.bills_missing_actions > 0 ? "needed" : "ok") : "on-demand", + count: stats?.bills_missing_actions, + countLabel: "bills missing action history", + }, + { + key: "sponsors", + name: "Backfill Sponsors", + description: "Link bill sponsors that weren't captured during the initial import. Safe to re-run — skips bills that already have a sponsor.", + fn: adminAPI.backfillSponsors, + status: stats ? (stats.bills_missing_sponsor > 0 ? "needed" : "ok") : "on-demand", + count: stats?.bills_missing_sponsor, + countLabel: "bills missing sponsor", + }, + { + key: "metadata", + name: "Backfill Dates & Links", + description: "Fill in missing introduced dates, chamber assignments, and congress.gov links by re-fetching bill detail from Congress.gov.", + fn: adminAPI.backfillMetadata, + status: stats ? (stats.bills_missing_metadata > 0 ? "needed" : "ok") : "on-demand", + count: stats?.bills_missing_metadata, + countLabel: "bills missing metadata", + }, + { + key: "citations", + name: "Backfill Citations", + description: "Regenerate AI briefs created before inline source citations were added. Deletes the old brief and re-runs LLM analysis using already-stored bill text.", + fn: adminAPI.backfillCitations, + status: stats ? (stats.uncited_briefs > 0 ? "needed" : "ok") : "on-demand", + count: stats?.uncited_briefs, + countLabel: "briefs need regeneration", + }, + { + key: "labels", + name: "Backfill Fact/Inference Labels", + description: "Classify existing cited brief points as fact or inference. One compact LLM call per brief — no re-generation of summaries or citations.", + fn: adminAPI.backfillLabels, + status: stats ? (stats.unlabeled_briefs > 0 ? "needed" : "ok") : "on-demand", + count: stats?.unlabeled_briefs, + countLabel: "briefs with unlabeled points", + }, + ]; + + const maintenanceNeeded = maintenance.some((m) => m.status === "needed"); + + return ( + <> +
+ {recurring.map(renderRow)} +
+ + {/* Maintenance subsection */} +
+ + {showMaintenance && ( +
+ {maintenance.map(renderRow)} +
+ )} +
+ + ); + })()}
); diff --git a/frontend/components/bills/AIBriefCard.tsx b/frontend/components/bills/AIBriefCard.tsx index bcd73db..593b603 100644 --- a/frontend/components/bills/AIBriefCard.tsx +++ b/frontend/components/bills/AIBriefCard.tsx @@ -31,6 +31,14 @@ function CitedItem({ point, icon, govinfo_url, openKey, activeKey, setActiveKey
{icon} {cited ? point.text : point} + {cited && point.label === "inference" && ( + + Inferred + + )} {cited && (