feat(phase2): fact/inference labeling, change-driven alerts, admin cleanup
- Add label: cited_fact | inference to LLM brief schema (all 4 providers) - Inferred badge in AIBriefCard for inference-labeled points - backfill_brief_labels Celery task: classifies existing cited points in-place - POST /api/admin/backfill-labels + unlabeled_briefs stat counter - Expand milestone keywords: markup, conference - Add is_referral_action() for committee referrals (referred to) - Two-tier milestone notifications: progress tier (all follow modes) and referral tier (pocket_veto/boost only, neutral suppressed) - Topic followers now receive bill_updated milestone notifications via latest brief topic_tags lookup in _update_bill_if_changed() - Admin Manual Controls: collapsible Maintenance section for backfill tasks - Update ARCHITECTURE.md and roadmap for Phase 2 completion Co-Authored-By: Jack Levy
This commit is contained in:
@@ -134,6 +134,23 @@ async def get_stats(
|
||||
bills_missing_actions = (await db.execute(
|
||||
text("SELECT COUNT(*) FROM bills WHERE actions_fetched_at IS NULL")
|
||||
)).scalar()
|
||||
# Cited brief points (objects) that have no label yet
|
||||
unlabeled_briefs = (await db.execute(
|
||||
text("""
|
||||
SELECT COUNT(*) FROM bill_briefs
|
||||
WHERE (
|
||||
key_points IS NOT NULL AND EXISTS (
|
||||
SELECT 1 FROM jsonb_array_elements(key_points) AS p
|
||||
WHERE jsonb_typeof(p) = 'object' AND (p->>'label') IS NULL
|
||||
)
|
||||
) OR (
|
||||
risks IS NOT NULL AND EXISTS (
|
||||
SELECT 1 FROM jsonb_array_elements(risks) AS r
|
||||
WHERE jsonb_typeof(r) = 'object' AND (r->>'label') IS NULL
|
||||
)
|
||||
)
|
||||
""")
|
||||
)).scalar()
|
||||
return {
|
||||
"total_bills": total_bills,
|
||||
"docs_fetched": docs_fetched,
|
||||
@@ -146,6 +163,7 @@ async def get_stats(
|
||||
"bills_missing_sponsor": bills_missing_sponsor,
|
||||
"bills_missing_metadata": bills_missing_metadata,
|
||||
"bills_missing_actions": bills_missing_actions,
|
||||
"unlabeled_briefs": unlabeled_briefs,
|
||||
"remaining": total_bills - total_briefs,
|
||||
}
|
||||
|
||||
@@ -204,6 +222,14 @@ async def backfill_metadata(current_user: User = Depends(get_current_admin)):
|
||||
return {"task_id": task.id, "status": "queued"}
|
||||
|
||||
|
||||
@router.post("/backfill-labels")
|
||||
async def backfill_labels(current_user: User = Depends(get_current_admin)):
|
||||
"""Classify existing cited brief points as fact or inference without re-generating briefs."""
|
||||
from app.workers.llm_processor import backfill_brief_labels
|
||||
task = backfill_brief_labels.delay()
|
||||
return {"task_id": task.id, "status": "queued"}
|
||||
|
||||
|
||||
@router.post("/resume-analysis")
|
||||
async def resume_analysis(current_user: User = Depends(get_current_admin)):
|
||||
"""Re-queue LLM processing for docs with no brief, and document fetching for bills with no doc."""
|
||||
|
||||
@@ -22,10 +22,10 @@ Always respond with valid JSON matching exactly this schema:
|
||||
{
|
||||
"summary": "2-4 paragraph plain-language summary of what this bill does",
|
||||
"key_points": [
|
||||
{"text": "specific concrete fact", "citation": "Section X(y)", "quote": "verbatim excerpt from bill ≤80 words"}
|
||||
{"text": "specific concrete fact", "citation": "Section X(y)", "quote": "verbatim excerpt from bill ≤80 words", "label": "cited_fact"}
|
||||
],
|
||||
"risks": [
|
||||
{"text": "legitimate concern or challenge", "citation": "Section X(y)", "quote": "verbatim excerpt from bill ≤80 words"}
|
||||
{"text": "legitimate concern or challenge", "citation": "Section X(y)", "quote": "verbatim excerpt from bill ≤80 words", "label": "cited_fact"}
|
||||
],
|
||||
"deadlines": [{"date": "YYYY-MM-DD or null", "description": "what happens on this date"}],
|
||||
"topic_tags": ["healthcare", "taxation"]
|
||||
@@ -35,10 +35,14 @@ Rules:
|
||||
- summary: Explain WHAT the bill does, not whether it is good or bad. Be factual and complete.
|
||||
- key_points: 5-10 specific, concrete things the bill changes, authorizes, or appropriates. \
|
||||
Each item MUST include "text" (your claim), "citation" (the section number, e.g. "Section 301(a)(2)"), \
|
||||
and "quote" (a verbatim excerpt of ≤80 words from that section that supports your claim).
|
||||
"quote" (a verbatim excerpt of ≤80 words from that section that supports your claim), and "label".
|
||||
- risks: Legitimate concerns from any perspective — costs, implementation challenges, \
|
||||
constitutional questions, unintended consequences. Include at least 2 even for benign bills. \
|
||||
Each item MUST include "text", "citation", and "quote" just like key_points.
|
||||
Each item MUST include "text", "citation", "quote", and "label" just like key_points.
|
||||
- label: "cited_fact" if the claim is directly and explicitly stated in the quoted text. \
|
||||
"inference" if the claim is an analytical interpretation, projection, or implication that goes \
|
||||
beyond what the text literally says (e.g. projected costs, likely downstream effects, \
|
||||
constitutional questions). When in doubt, use "inference".
|
||||
- deadlines: Only include if explicitly stated in the text. Use null for date if a deadline \
|
||||
is mentioned without a specific date. Empty list if none.
|
||||
- topic_tags: 3-8 lowercase tags. Prefer these standard tags: healthcare, taxation, defense, \
|
||||
@@ -90,10 +94,10 @@ Always respond with valid JSON matching exactly this schema:
|
||||
{
|
||||
"summary": "2-3 paragraph plain-language description of what changed in this version",
|
||||
"key_points": [
|
||||
{"text": "specific change", "citation": "Section X(y)", "quote": "verbatim excerpt from new version ≤80 words"}
|
||||
{"text": "specific change", "citation": "Section X(y)", "quote": "verbatim excerpt from new version ≤80 words", "label": "cited_fact"}
|
||||
],
|
||||
"risks": [
|
||||
{"text": "new concern introduced by this change", "citation": "Section X(y)", "quote": "verbatim excerpt from new version ≤80 words"}
|
||||
{"text": "new concern introduced by this change", "citation": "Section X(y)", "quote": "verbatim excerpt from new version ≤80 words", "label": "cited_fact"}
|
||||
],
|
||||
"deadlines": [{"date": "YYYY-MM-DD or null", "description": "new deadline added"}],
|
||||
"topic_tags": ["healthcare", "taxation"]
|
||||
@@ -103,9 +107,12 @@ Rules:
|
||||
- summary: Focus ONLY on what is different from the previous version. Be specific.
|
||||
- key_points: List concrete additions, removals, or modifications in this version. \
|
||||
Each item MUST include "text" (your claim), "citation" (the section number, e.g. "Section 301(a)(2)"), \
|
||||
and "quote" (a verbatim excerpt of ≤80 words from the NEW version that supports your claim).
|
||||
"quote" (a verbatim excerpt of ≤80 words from the NEW version that supports your claim), and "label".
|
||||
- risks: Only include risks that are new or changed relative to the previous version. \
|
||||
Each item MUST include "text", "citation", and "quote" just like key_points.
|
||||
Each item MUST include "text", "citation", "quote", and "label" just like key_points.
|
||||
- label: "cited_fact" if the claim is directly and explicitly stated in the quoted text. \
|
||||
"inference" if the claim is an analytical interpretation, projection, or implication that goes \
|
||||
beyond what the text literally says. When in doubt, use "inference".
|
||||
- deadlines: Only new or changed deadlines. Empty list if none.
|
||||
- topic_tags: Same standard tags as before — include any new topics this version adds.
|
||||
|
||||
|
||||
@@ -338,12 +338,29 @@ def _update_bill_if_changed(db, existing: Bill, parsed: dict) -> bool:
|
||||
from app.workers.notification_utils import (
|
||||
emit_bill_notification,
|
||||
emit_member_follow_notifications,
|
||||
emit_topic_follow_notifications,
|
||||
is_milestone_action,
|
||||
is_referral_action,
|
||||
)
|
||||
if is_milestone_action(parsed.get("latest_action_text", "")):
|
||||
action_text = parsed["latest_action_text"]
|
||||
emit_bill_notification(db, existing, "bill_updated", action_text)
|
||||
emit_member_follow_notifications(db, existing, "bill_updated", action_text)
|
||||
action_text = parsed.get("latest_action_text", "")
|
||||
is_milestone = is_milestone_action(action_text)
|
||||
is_referral = not is_milestone and is_referral_action(action_text)
|
||||
if is_milestone or is_referral:
|
||||
tier = "progress" if is_milestone else "referral"
|
||||
emit_bill_notification(db, existing, "bill_updated", action_text, milestone_tier=tier)
|
||||
emit_member_follow_notifications(db, existing, "bill_updated", action_text, milestone_tier=tier)
|
||||
# Topic followers — pull tags from the bill's latest brief
|
||||
from app.models.brief import BillBrief
|
||||
latest_brief = (
|
||||
db.query(BillBrief)
|
||||
.filter_by(bill_id=existing.bill_id)
|
||||
.order_by(BillBrief.created_at.desc())
|
||||
.first()
|
||||
)
|
||||
topic_tags = latest_brief.topic_tags or [] if latest_brief else []
|
||||
emit_topic_follow_notifications(
|
||||
db, existing, "bill_updated", action_text, topic_tags, milestone_tier=tier
|
||||
)
|
||||
return changed
|
||||
|
||||
|
||||
|
||||
@@ -181,6 +181,113 @@ def backfill_brief_citations(self):
|
||||
db.close()
|
||||
|
||||
|
||||
@celery_app.task(bind=True, name="app.workers.llm_processor.backfill_brief_labels")
|
||||
def backfill_brief_labels(self):
|
||||
"""
|
||||
Add fact/inference labels to existing cited brief points without re-generating them.
|
||||
Sends one compact classification call per brief (all unlabeled points batched).
|
||||
Skips briefs already fully labeled and plain-string points (no quote to classify).
|
||||
"""
|
||||
import json
|
||||
from sqlalchemy.orm.attributes import flag_modified
|
||||
from app.models.setting import AppSetting
|
||||
|
||||
db = get_sync_db()
|
||||
try:
|
||||
unlabeled_ids = db.execute(text("""
|
||||
SELECT id FROM bill_briefs
|
||||
WHERE (
|
||||
key_points IS NOT NULL AND EXISTS (
|
||||
SELECT 1 FROM jsonb_array_elements(key_points) AS p
|
||||
WHERE jsonb_typeof(p) = 'object' AND (p->>'label') IS NULL
|
||||
)
|
||||
) OR (
|
||||
risks IS NOT NULL AND EXISTS (
|
||||
SELECT 1 FROM jsonb_array_elements(risks) AS r
|
||||
WHERE jsonb_typeof(r) = 'object' AND (r->>'label') IS NULL
|
||||
)
|
||||
)
|
||||
""")).fetchall()
|
||||
|
||||
total = len(unlabeled_ids)
|
||||
updated = 0
|
||||
skipped = 0
|
||||
|
||||
prov_row = db.get(AppSetting, "llm_provider")
|
||||
model_row = db.get(AppSetting, "llm_model")
|
||||
provider = get_llm_provider(
|
||||
prov_row.value if prov_row else None,
|
||||
model_row.value if model_row else None,
|
||||
)
|
||||
|
||||
for row in unlabeled_ids:
|
||||
brief = db.get(BillBrief, row.id)
|
||||
if not brief:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Collect all unlabeled cited points across both fields
|
||||
to_classify: list[tuple[str, int, dict]] = []
|
||||
for field_name in ("key_points", "risks"):
|
||||
for i, p in enumerate(getattr(brief, field_name) or []):
|
||||
if isinstance(p, dict) and p.get("label") is None:
|
||||
to_classify.append((field_name, i, p))
|
||||
|
||||
if not to_classify:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
lines = [
|
||||
f'{i + 1}. TEXT: "{p["text"]}" | QUOTE: "{p.get("quote", "")}"'
|
||||
for i, (_, __, p) in enumerate(to_classify)
|
||||
]
|
||||
prompt = (
|
||||
"Classify each item as 'cited_fact' or 'inference'.\n"
|
||||
"cited_fact = the claim is explicitly and directly stated in the quoted text.\n"
|
||||
"inference = analytical interpretation, projection, or implication not literally stated.\n\n"
|
||||
"Return ONLY a JSON array of strings, one per item, in order. No explanation.\n\n"
|
||||
"Items:\n" + "\n".join(lines)
|
||||
)
|
||||
|
||||
try:
|
||||
raw = provider.generate_text(prompt).strip()
|
||||
if raw.startswith("```"):
|
||||
raw = raw.split("```")[1]
|
||||
if raw.startswith("json"):
|
||||
raw = raw[4:]
|
||||
labels = json.loads(raw.strip())
|
||||
if not isinstance(labels, list) or len(labels) != len(to_classify):
|
||||
logger.warning(f"Brief {brief.id}: label count mismatch, skipping")
|
||||
skipped += 1
|
||||
continue
|
||||
except Exception as exc:
|
||||
logger.warning(f"Brief {brief.id}: classification failed: {exc}")
|
||||
skipped += 1
|
||||
time.sleep(0.5)
|
||||
continue
|
||||
|
||||
fields_modified: set[str] = set()
|
||||
for (field_name, point_idx, _), label in zip(to_classify, labels):
|
||||
if label in ("cited_fact", "inference"):
|
||||
getattr(brief, field_name)[point_idx]["label"] = label
|
||||
fields_modified.add(field_name)
|
||||
|
||||
for field_name in fields_modified:
|
||||
flag_modified(brief, field_name)
|
||||
|
||||
db.commit()
|
||||
updated += 1
|
||||
time.sleep(0.2)
|
||||
|
||||
logger.info(
|
||||
f"backfill_brief_labels: {total} briefs found, "
|
||||
f"{updated} updated, {skipped} skipped"
|
||||
)
|
||||
return {"total": total, "updated": updated, "skipped": skipped}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@celery_app.task(bind=True, name="app.workers.llm_processor.resume_pending_analysis")
|
||||
def resume_pending_analysis(self):
|
||||
"""
|
||||
|
||||
@@ -94,6 +94,13 @@ def dispatch_notifications(self):
|
||||
db.commit()
|
||||
continue
|
||||
|
||||
# Referral-tier events (committee referrals) are noisy for neutral follows;
|
||||
# pocket_veto and pocket_boost users want them as early warnings
|
||||
if follow_mode == "neutral" and (event.payload or {}).get("milestone_tier") == "referral":
|
||||
event.dispatched_at = now
|
||||
db.commit()
|
||||
continue
|
||||
|
||||
prefs = user.notification_prefs or {}
|
||||
ntfy_url = prefs.get("ntfy_topic_url", "").strip()
|
||||
ntfy_auth_method = prefs.get("ntfy_auth_method", "none")
|
||||
|
||||
@@ -10,8 +10,15 @@ _MILESTONE_KEYWORDS = [
|
||||
"presented to the president",
|
||||
"ordered to be reported", "ordered reported",
|
||||
"reported by", "discharged",
|
||||
"placed on", # placed on calendar
|
||||
"placed on", # placed on calendar
|
||||
"cloture", "roll call",
|
||||
"markup", # markup session scheduled/completed
|
||||
"conference", # conference committee activity
|
||||
]
|
||||
|
||||
# Committee referral — meaningful for pocket_veto/boost but noisy for neutral
|
||||
_REFERRAL_KEYWORDS = [
|
||||
"referred to",
|
||||
]
|
||||
|
||||
# Events created within this window for the same (user, bill, event_type) are suppressed
|
||||
@@ -23,7 +30,12 @@ def is_milestone_action(action_text: str) -> bool:
|
||||
return any(kw in t for kw in _MILESTONE_KEYWORDS)
|
||||
|
||||
|
||||
def _build_payload(bill, action_summary: str) -> dict:
|
||||
def is_referral_action(action_text: str) -> bool:
|
||||
t = (action_text or "").lower()
|
||||
return any(kw in t for kw in _REFERRAL_KEYWORDS)
|
||||
|
||||
|
||||
def _build_payload(bill, action_summary: str, milestone_tier: str = "progress") -> dict:
|
||||
from app.config import settings
|
||||
base_url = (settings.PUBLIC_URL or settings.LOCAL_URL).rstrip("/")
|
||||
return {
|
||||
@@ -31,6 +43,7 @@ def _build_payload(bill, action_summary: str) -> dict:
|
||||
"bill_label": f"{bill.bill_type.upper()} {bill.bill_number}",
|
||||
"brief_summary": (action_summary or "")[:300],
|
||||
"bill_url": f"{base_url}/bills/{bill.bill_id}",
|
||||
"milestone_tier": milestone_tier,
|
||||
}
|
||||
|
||||
|
||||
@@ -45,7 +58,9 @@ def _is_duplicate(db, user_id: int, bill_id: str, event_type: str) -> bool:
|
||||
).filter(NotificationEvent.created_at > cutoff).first() is not None
|
||||
|
||||
|
||||
def emit_bill_notification(db, bill, event_type: str, action_summary: str) -> int:
|
||||
def emit_bill_notification(
|
||||
db, bill, event_type: str, action_summary: str, milestone_tier: str = "progress"
|
||||
) -> int:
|
||||
"""Create NotificationEvent rows for every user following this bill. Returns count."""
|
||||
from app.models.follow import Follow
|
||||
from app.models.notification import NotificationEvent
|
||||
@@ -54,7 +69,7 @@ def emit_bill_notification(db, bill, event_type: str, action_summary: str) -> in
|
||||
if not followers:
|
||||
return 0
|
||||
|
||||
payload = _build_payload(bill, action_summary)
|
||||
payload = _build_payload(bill, action_summary, milestone_tier)
|
||||
count = 0
|
||||
for follow in followers:
|
||||
if _is_duplicate(db, follow.user_id, bill.bill_id, event_type):
|
||||
@@ -71,7 +86,9 @@ def emit_bill_notification(db, bill, event_type: str, action_summary: str) -> in
|
||||
return count
|
||||
|
||||
|
||||
def emit_member_follow_notifications(db, bill, event_type: str, action_summary: str) -> int:
|
||||
def emit_member_follow_notifications(
|
||||
db, bill, event_type: str, action_summary: str, milestone_tier: str = "progress"
|
||||
) -> int:
|
||||
"""Notify users following the bill's sponsor (dedup prevents double-alerts for bill+member followers)."""
|
||||
if not bill.sponsor_id:
|
||||
return 0
|
||||
@@ -83,7 +100,7 @@ def emit_member_follow_notifications(db, bill, event_type: str, action_summary:
|
||||
if not followers:
|
||||
return 0
|
||||
|
||||
payload = _build_payload(bill, action_summary)
|
||||
payload = _build_payload(bill, action_summary, milestone_tier)
|
||||
count = 0
|
||||
for follow in followers:
|
||||
if _is_duplicate(db, follow.user_id, bill.bill_id, event_type):
|
||||
@@ -100,7 +117,10 @@ def emit_member_follow_notifications(db, bill, event_type: str, action_summary:
|
||||
return count
|
||||
|
||||
|
||||
def emit_topic_follow_notifications(db, bill, event_type: str, action_summary: str, topic_tags: list) -> int:
|
||||
def emit_topic_follow_notifications(
|
||||
db, bill, event_type: str, action_summary: str, topic_tags: list,
|
||||
milestone_tier: str = "progress",
|
||||
) -> int:
|
||||
"""Notify users following any of the bill's topic tags."""
|
||||
if not topic_tags:
|
||||
return 0
|
||||
@@ -120,7 +140,7 @@ def emit_topic_follow_notifications(db, bill, event_type: str, action_summary: s
|
||||
if not followers:
|
||||
return 0
|
||||
|
||||
payload = _build_payload(bill, action_summary)
|
||||
payload = _build_payload(bill, action_summary, milestone_tier)
|
||||
count = 0
|
||||
for follow in followers:
|
||||
if _is_duplicate(db, follow.user_id, bill.bill_id, event_type):
|
||||
|
||||
Reference in New Issue
Block a user