From 8625c850a0da0d654161026b932fc7bc2607c0f2 Mon Sep 17 00:00:00 2001 From: Jack Levy Date: Sat, 14 Mar 2026 18:33:12 -0400 Subject: [PATCH] fix: handle within-page cosponsor duplicates from Congress.gov API Congress.gov occasionally returns the same member twice on a single page with different sponsorship dates (observed: Sen. Warnock on 119-s-1383). The DB uniqueness check didn't catch this because the first insert hadn't been committed yet when processing the duplicate row, causing a UniqueViolation. Fix adds an `inserted_this_run` set to skip bioguide_ids already added in the current fetch loop. Co-Authored-By: Claude Sonnet 4.6 --- backend/app/workers/bill_classifier.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/backend/app/workers/bill_classifier.py b/backend/app/workers/bill_classifier.py index 0bb1a44..4f21294 100644 --- a/backend/app/workers/bill_classifier.py +++ b/backend/app/workers/bill_classifier.py @@ -140,6 +140,9 @@ def fetch_bill_cosponsors(self, bill_id: str): return {"status": "skipped"} known_bioguides = {row[0] for row in db.execute(text("SELECT bioguide_id FROM members")).fetchall()} + # Track bioguide_ids already inserted this run to handle within-page dupes + # (Congress.gov sometimes lists the same member twice with different dates) + inserted_this_run: set[str] = set() inserted = 0 offset = 0 @@ -157,12 +160,15 @@ def fetch_bill_cosponsors(self, bill_id: str): if bioguide_id and bioguide_id not in known_bioguides: bioguide_id = None - # Skip if we already have this (bioguide_id, bill_id) pair + # Skip dupes — both across runs (DB check) and within this page if bioguide_id: + if bioguide_id in inserted_this_run: + continue exists = db.query(BillCosponsor).filter_by( bill_id=bill_id, bioguide_id=bioguide_id ).first() if exists: + inserted_this_run.add(bioguide_id) continue date_str = cs.get("sponsorshipDate") @@ -179,6 +185,8 @@ def fetch_bill_cosponsors(self, bill_id: str): state=cs.get("state"), sponsored_date=sponsored_date, )) + if bioguide_id: + inserted_this_run.add(bioguide_id) inserted += 1 db.commit()