fix(news): per-bill URL dedup + lazy re-fetch on bill detail load

- Drop global unique constraint on news_articles.url; replace with
  (bill_id, url) so the same article can appear for multiple bills
- news_fetcher dedup now scoped to bill_id instead of global URL
- Bill detail endpoint triggers a background news fetch when no
  articles are stored, so gnews articles surface on next load

Migration 0009.

Co-Authored-By: Jack Levy
This commit is contained in:
Jack Levy
2026-03-01 00:43:10 -05:00
parent a66b5b4bcb
commit 50f93468db
4 changed files with 43 additions and 4 deletions

View File

@@ -0,0 +1,29 @@
"""fix news_articles url uniqueness to per-bill scope
Previously url was globally unique, meaning the same article could only
be stored for one bill. This changes it to (bill_id, url) unique so the
same article can appear in multiple bills' news panels.
Revision ID: 0009
Revises: 0008
Create Date: 2026-03-01
"""
import sqlalchemy as sa
from alembic import op
revision = "0009"
down_revision = "0008"
branch_labels = None
depends_on = None
def upgrade():
# Drop the old global unique constraint on url
op.drop_constraint("news_articles_url_key", "news_articles", type_="unique")
# Add per-bill unique constraint
op.create_unique_constraint("uq_news_articles_bill_url", "news_articles", ["bill_id", "url"])
def downgrade():
op.drop_constraint("uq_news_articles_bill_url", "news_articles", type_="unique")
op.create_unique_constraint("news_articles_url_key", "news_articles", ["url"])

View File

@@ -109,6 +109,15 @@ async def get_bill(bill_id: str, db: AsyncSession = Depends(get_db)):
detail.latest_brief = bill.briefs[0] detail.latest_brief = bill.briefs[0]
if bill.trend_scores: if bill.trend_scores:
detail.latest_trend = bill.trend_scores[0] detail.latest_trend = bill.trend_scores[0]
# Trigger a background news refresh if no articles are stored yet
if not bill.news_articles:
try:
from app.workers.news_fetcher import fetch_news_for_bill
fetch_news_for_bill.delay(bill_id)
except Exception:
pass
return detail return detail

View File

@@ -1,4 +1,4 @@
from sqlalchemy import Column, Integer, String, Text, Float, DateTime, ForeignKey, Index from sqlalchemy import Column, Integer, String, Text, Float, DateTime, ForeignKey, Index, UniqueConstraint
from sqlalchemy.orm import relationship from sqlalchemy.orm import relationship
from sqlalchemy.sql import func from sqlalchemy.sql import func
@@ -12,7 +12,7 @@ class NewsArticle(Base):
bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False) bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False)
source = Column(String(200)) source = Column(String(200))
headline = Column(Text) headline = Column(Text)
url = Column(String, unique=True) url = Column(String)
published_at = Column(DateTime(timezone=True)) published_at = Column(DateTime(timezone=True))
relevance_score = Column(Float, default=0.0) relevance_score = Column(Float, default=0.0)
created_at = Column(DateTime(timezone=True), server_default=func.now()) created_at = Column(DateTime(timezone=True), server_default=func.now())
@@ -20,6 +20,7 @@ class NewsArticle(Base):
bill = relationship("Bill", back_populates="news_articles") bill = relationship("Bill", back_populates="news_articles")
__table_args__ = ( __table_args__ = (
UniqueConstraint("bill_id", "url", name="uq_news_articles_bill_url"),
Index("ix_news_articles_bill_id", "bill_id"), Index("ix_news_articles_bill_id", "bill_id"),
Index("ix_news_articles_published_at", "published_at"), Index("ix_news_articles_published_at", "published_at"),
) )

View File

@@ -50,8 +50,8 @@ def fetch_news_for_bill(self, bill_id: str):
url = article.get("url") url = article.get("url")
if not url: if not url:
continue continue
# Idempotency: skip duplicate URLs # Idempotency: skip duplicates per bill (same article can appear for multiple bills)
existing = db.query(NewsArticle).filter_by(url=url).first() existing = db.query(NewsArticle).filter_by(bill_id=bill_id, url=url).first()
if existing: if existing:
continue continue
pub_at = None pub_at = None