feat(phase2): fact/inference labeling, change-driven alerts, admin cleanup
- Add label: cited_fact | inference to LLM brief schema (all 4 providers) - Inferred badge in AIBriefCard for inference-labeled points - backfill_brief_labels Celery task: classifies existing cited points in-place - POST /api/admin/backfill-labels + unlabeled_briefs stat counter - Expand milestone keywords: markup, conference - Add is_referral_action() for committee referrals (referred to) - Two-tier milestone notifications: progress tier (all follow modes) and referral tier (pocket_veto/boost only, neutral suppressed) - Topic followers now receive bill_updated milestone notifications via latest brief topic_tags lookup in _update_bill_if_changed() - Admin Manual Controls: collapsible Maintenance section for backfill tasks - Update ARCHITECTURE.md and roadmap for Phase 2 completion Co-Authored-By: Jack Levy
This commit is contained in:
@@ -181,6 +181,113 @@ def backfill_brief_citations(self):
|
||||
db.close()
|
||||
|
||||
|
||||
@celery_app.task(bind=True, name="app.workers.llm_processor.backfill_brief_labels")
|
||||
def backfill_brief_labels(self):
|
||||
"""
|
||||
Add fact/inference labels to existing cited brief points without re-generating them.
|
||||
Sends one compact classification call per brief (all unlabeled points batched).
|
||||
Skips briefs already fully labeled and plain-string points (no quote to classify).
|
||||
"""
|
||||
import json
|
||||
from sqlalchemy.orm.attributes import flag_modified
|
||||
from app.models.setting import AppSetting
|
||||
|
||||
db = get_sync_db()
|
||||
try:
|
||||
unlabeled_ids = db.execute(text("""
|
||||
SELECT id FROM bill_briefs
|
||||
WHERE (
|
||||
key_points IS NOT NULL AND EXISTS (
|
||||
SELECT 1 FROM jsonb_array_elements(key_points) AS p
|
||||
WHERE jsonb_typeof(p) = 'object' AND (p->>'label') IS NULL
|
||||
)
|
||||
) OR (
|
||||
risks IS NOT NULL AND EXISTS (
|
||||
SELECT 1 FROM jsonb_array_elements(risks) AS r
|
||||
WHERE jsonb_typeof(r) = 'object' AND (r->>'label') IS NULL
|
||||
)
|
||||
)
|
||||
""")).fetchall()
|
||||
|
||||
total = len(unlabeled_ids)
|
||||
updated = 0
|
||||
skipped = 0
|
||||
|
||||
prov_row = db.get(AppSetting, "llm_provider")
|
||||
model_row = db.get(AppSetting, "llm_model")
|
||||
provider = get_llm_provider(
|
||||
prov_row.value if prov_row else None,
|
||||
model_row.value if model_row else None,
|
||||
)
|
||||
|
||||
for row in unlabeled_ids:
|
||||
brief = db.get(BillBrief, row.id)
|
||||
if not brief:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Collect all unlabeled cited points across both fields
|
||||
to_classify: list[tuple[str, int, dict]] = []
|
||||
for field_name in ("key_points", "risks"):
|
||||
for i, p in enumerate(getattr(brief, field_name) or []):
|
||||
if isinstance(p, dict) and p.get("label") is None:
|
||||
to_classify.append((field_name, i, p))
|
||||
|
||||
if not to_classify:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
lines = [
|
||||
f'{i + 1}. TEXT: "{p["text"]}" | QUOTE: "{p.get("quote", "")}"'
|
||||
for i, (_, __, p) in enumerate(to_classify)
|
||||
]
|
||||
prompt = (
|
||||
"Classify each item as 'cited_fact' or 'inference'.\n"
|
||||
"cited_fact = the claim is explicitly and directly stated in the quoted text.\n"
|
||||
"inference = analytical interpretation, projection, or implication not literally stated.\n\n"
|
||||
"Return ONLY a JSON array of strings, one per item, in order. No explanation.\n\n"
|
||||
"Items:\n" + "\n".join(lines)
|
||||
)
|
||||
|
||||
try:
|
||||
raw = provider.generate_text(prompt).strip()
|
||||
if raw.startswith("```"):
|
||||
raw = raw.split("```")[1]
|
||||
if raw.startswith("json"):
|
||||
raw = raw[4:]
|
||||
labels = json.loads(raw.strip())
|
||||
if not isinstance(labels, list) or len(labels) != len(to_classify):
|
||||
logger.warning(f"Brief {brief.id}: label count mismatch, skipping")
|
||||
skipped += 1
|
||||
continue
|
||||
except Exception as exc:
|
||||
logger.warning(f"Brief {brief.id}: classification failed: {exc}")
|
||||
skipped += 1
|
||||
time.sleep(0.5)
|
||||
continue
|
||||
|
||||
fields_modified: set[str] = set()
|
||||
for (field_name, point_idx, _), label in zip(to_classify, labels):
|
||||
if label in ("cited_fact", "inference"):
|
||||
getattr(brief, field_name)[point_idx]["label"] = label
|
||||
fields_modified.add(field_name)
|
||||
|
||||
for field_name in fields_modified:
|
||||
flag_modified(brief, field_name)
|
||||
|
||||
db.commit()
|
||||
updated += 1
|
||||
time.sleep(0.2)
|
||||
|
||||
logger.info(
|
||||
f"backfill_brief_labels: {total} briefs found, "
|
||||
f"{updated} updated, {skipped} skipped"
|
||||
)
|
||||
return {"total": total, "updated": updated, "skipped": skipped}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@celery_app.task(bind=True, name="app.workers.llm_processor.resume_pending_analysis")
|
||||
def resume_pending_analysis(self):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user