Limit bills to last 2 months + filter out procedural resolutions

- Drop hres/sres/hconres/sconres (simple/concurrent resolutions) from poller;
  only track hr, s, hjres, sjres (legislation that can become law)
- On first run, seed from 60 days back instead of full congress history,
  keeping the bill count to ~1,600 instead of 13,000+

Authored-By: Jack Levy
This commit is contained in:
Jack Levy
2026-02-28 22:21:34 -05:00
parent 5b73b60d9e
commit c48241fe2f

View File

@@ -6,7 +6,7 @@ Uses fromDateTime to fetch only recently updated bills.
All operations are idempotent. All operations are idempotent.
""" """
import logging import logging
from datetime import datetime, timezone from datetime import datetime, timedelta, timezone
from app.database import get_sync_db from app.database import get_sync_db
from app.models import Bill, BillAction, Member, AppSetting from app.models import Bill, BillAction, Member, AppSetting
@@ -30,12 +30,21 @@ def _set_setting(db, key: str, value: str) -> None:
db.commit() db.commit()
# Only track legislation that can become law. Simple/concurrent resolutions
# (hres, sres, hconres, sconres) are procedural and not worth analyzing.
TRACKED_BILL_TYPES = {"hr", "s", "hjres", "sjres"}
@celery_app.task(bind=True, max_retries=3, name="app.workers.congress_poller.poll_congress_bills") @celery_app.task(bind=True, max_retries=3, name="app.workers.congress_poller.poll_congress_bills")
def poll_congress_bills(self): def poll_congress_bills(self):
"""Fetch recently updated bills from Congress.gov and enqueue document + LLM processing.""" """Fetch recently updated bills from Congress.gov and enqueue document + LLM processing."""
db = get_sync_db() db = get_sync_db()
try: try:
last_polled = _get_setting(db, "congress_last_polled_at") last_polled = _get_setting(db, "congress_last_polled_at")
# On first run, seed from 2 months back rather than the full congress history
if not last_polled:
two_months_ago = datetime.now(timezone.utc) - timedelta(days=60)
last_polled = two_months_ago.strftime("%Y-%m-%dT%H:%M:%SZ")
current_congress = congress_api.get_current_congress() current_congress = congress_api.get_current_congress()
logger.info(f"Polling Congress {current_congress} (since {last_polled})") logger.info(f"Polling Congress {current_congress} (since {last_polled})")
@@ -56,6 +65,8 @@ def poll_congress_bills(self):
for bill_data in bills_data: for bill_data in bills_data:
parsed = congress_api.parse_bill_from_api(bill_data, current_congress) parsed = congress_api.parse_bill_from_api(bill_data, current_congress)
if parsed.get("bill_type") not in TRACKED_BILL_TYPES:
continue
bill_id = parsed["bill_id"] bill_id = parsed["bill_id"]
existing = db.get(Bill, bill_id) existing = db.get(Bill, bill_id)