fix(news): per-bill URL dedup + lazy re-fetch on bill detail load
- Drop global unique constraint on news_articles.url; replace with (bill_id, url) so the same article can appear for multiple bills - news_fetcher dedup now scoped to bill_id instead of global URL - Bill detail endpoint triggers a background news fetch when no articles are stored, so gnews articles surface on next load Migration 0009. Co-Authored-By: Jack Levy
This commit is contained in:
@@ -0,0 +1,29 @@
|
|||||||
|
"""fix news_articles url uniqueness to per-bill scope
|
||||||
|
|
||||||
|
Previously url was globally unique, meaning the same article could only
|
||||||
|
be stored for one bill. This changes it to (bill_id, url) unique so the
|
||||||
|
same article can appear in multiple bills' news panels.
|
||||||
|
|
||||||
|
Revision ID: 0009
|
||||||
|
Revises: 0008
|
||||||
|
Create Date: 2026-03-01
|
||||||
|
"""
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
revision = "0009"
|
||||||
|
down_revision = "0008"
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
# Drop the old global unique constraint on url
|
||||||
|
op.drop_constraint("news_articles_url_key", "news_articles", type_="unique")
|
||||||
|
# Add per-bill unique constraint
|
||||||
|
op.create_unique_constraint("uq_news_articles_bill_url", "news_articles", ["bill_id", "url"])
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
op.drop_constraint("uq_news_articles_bill_url", "news_articles", type_="unique")
|
||||||
|
op.create_unique_constraint("news_articles_url_key", "news_articles", ["url"])
|
||||||
@@ -109,6 +109,15 @@ async def get_bill(bill_id: str, db: AsyncSession = Depends(get_db)):
|
|||||||
detail.latest_brief = bill.briefs[0]
|
detail.latest_brief = bill.briefs[0]
|
||||||
if bill.trend_scores:
|
if bill.trend_scores:
|
||||||
detail.latest_trend = bill.trend_scores[0]
|
detail.latest_trend = bill.trend_scores[0]
|
||||||
|
|
||||||
|
# Trigger a background news refresh if no articles are stored yet
|
||||||
|
if not bill.news_articles:
|
||||||
|
try:
|
||||||
|
from app.workers.news_fetcher import fetch_news_for_bill
|
||||||
|
fetch_news_for_bill.delay(bill_id)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
return detail
|
return detail
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from sqlalchemy import Column, Integer, String, Text, Float, DateTime, ForeignKey, Index
|
from sqlalchemy import Column, Integer, String, Text, Float, DateTime, ForeignKey, Index, UniqueConstraint
|
||||||
from sqlalchemy.orm import relationship
|
from sqlalchemy.orm import relationship
|
||||||
from sqlalchemy.sql import func
|
from sqlalchemy.sql import func
|
||||||
|
|
||||||
@@ -12,7 +12,7 @@ class NewsArticle(Base):
|
|||||||
bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False)
|
bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False)
|
||||||
source = Column(String(200))
|
source = Column(String(200))
|
||||||
headline = Column(Text)
|
headline = Column(Text)
|
||||||
url = Column(String, unique=True)
|
url = Column(String)
|
||||||
published_at = Column(DateTime(timezone=True))
|
published_at = Column(DateTime(timezone=True))
|
||||||
relevance_score = Column(Float, default=0.0)
|
relevance_score = Column(Float, default=0.0)
|
||||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||||
@@ -20,6 +20,7 @@ class NewsArticle(Base):
|
|||||||
bill = relationship("Bill", back_populates="news_articles")
|
bill = relationship("Bill", back_populates="news_articles")
|
||||||
|
|
||||||
__table_args__ = (
|
__table_args__ = (
|
||||||
|
UniqueConstraint("bill_id", "url", name="uq_news_articles_bill_url"),
|
||||||
Index("ix_news_articles_bill_id", "bill_id"),
|
Index("ix_news_articles_bill_id", "bill_id"),
|
||||||
Index("ix_news_articles_published_at", "published_at"),
|
Index("ix_news_articles_published_at", "published_at"),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -50,8 +50,8 @@ def fetch_news_for_bill(self, bill_id: str):
|
|||||||
url = article.get("url")
|
url = article.get("url")
|
||||||
if not url:
|
if not url:
|
||||||
continue
|
continue
|
||||||
# Idempotency: skip duplicate URLs
|
# Idempotency: skip duplicates per bill (same article can appear for multiple bills)
|
||||||
existing = db.query(NewsArticle).filter_by(url=url).first()
|
existing = db.query(NewsArticle).filter_by(bill_id=bill_id, url=url).first()
|
||||||
if existing:
|
if existing:
|
||||||
continue
|
continue
|
||||||
pub_at = None
|
pub_at = None
|
||||||
|
|||||||
Reference in New Issue
Block a user