fix: handle within-page cosponsor duplicates from Congress.gov API

Congress.gov occasionally returns the same member twice on a single page
with different sponsorship dates (observed: Sen. Warnock on 119-s-1383).
The DB uniqueness check didn't catch this because the first insert hadn't
been committed yet when processing the duplicate row, causing a
UniqueViolation. Fix adds an `inserted_this_run` set to skip bioguide_ids
already added in the current fetch loop.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Jack Levy
2026-03-14 18:33:12 -04:00
parent d0da0b8dce
commit 8625c850a0

View File

@@ -140,6 +140,9 @@ def fetch_bill_cosponsors(self, bill_id: str):
return {"status": "skipped"} return {"status": "skipped"}
known_bioguides = {row[0] for row in db.execute(text("SELECT bioguide_id FROM members")).fetchall()} known_bioguides = {row[0] for row in db.execute(text("SELECT bioguide_id FROM members")).fetchall()}
# Track bioguide_ids already inserted this run to handle within-page dupes
# (Congress.gov sometimes lists the same member twice with different dates)
inserted_this_run: set[str] = set()
inserted = 0 inserted = 0
offset = 0 offset = 0
@@ -157,12 +160,15 @@ def fetch_bill_cosponsors(self, bill_id: str):
if bioguide_id and bioguide_id not in known_bioguides: if bioguide_id and bioguide_id not in known_bioguides:
bioguide_id = None bioguide_id = None
# Skip if we already have this (bioguide_id, bill_id) pair # Skip dupes — both across runs (DB check) and within this page
if bioguide_id: if bioguide_id:
if bioguide_id in inserted_this_run:
continue
exists = db.query(BillCosponsor).filter_by( exists = db.query(BillCosponsor).filter_by(
bill_id=bill_id, bioguide_id=bioguide_id bill_id=bill_id, bioguide_id=bioguide_id
).first() ).first()
if exists: if exists:
inserted_this_run.add(bioguide_id)
continue continue
date_str = cs.get("sponsorshipDate") date_str = cs.get("sponsorshipDate")
@@ -179,6 +185,8 @@ def fetch_bill_cosponsors(self, bill_id: str):
state=cs.get("state"), state=cs.get("state"),
sponsored_date=sponsored_date, sponsored_date=sponsored_date,
)) ))
if bioguide_id:
inserted_this_run.add(bioguide_id)
inserted += 1 inserted += 1
db.commit() db.commit()