fix(news): per-bill URL dedup + lazy re-fetch on bill detail load
- Drop global unique constraint on news_articles.url; replace with (bill_id, url) so the same article can appear for multiple bills - news_fetcher dedup now scoped to bill_id instead of global URL - Bill detail endpoint triggers a background news fetch when no articles are stored, so gnews articles surface on next load Migration 0009. Co-Authored-By: Jack Levy
This commit is contained in:
@@ -0,0 +1,29 @@
|
||||
"""fix news_articles url uniqueness to per-bill scope
|
||||
|
||||
Previously url was globally unique, meaning the same article could only
|
||||
be stored for one bill. This changes it to (bill_id, url) unique so the
|
||||
same article can appear in multiple bills' news panels.
|
||||
|
||||
Revision ID: 0009
|
||||
Revises: 0008
|
||||
Create Date: 2026-03-01
|
||||
"""
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
revision = "0009"
|
||||
down_revision = "0008"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# Drop the old global unique constraint on url
|
||||
op.drop_constraint("news_articles_url_key", "news_articles", type_="unique")
|
||||
# Add per-bill unique constraint
|
||||
op.create_unique_constraint("uq_news_articles_bill_url", "news_articles", ["bill_id", "url"])
|
||||
|
||||
|
||||
def downgrade():
|
||||
op.drop_constraint("uq_news_articles_bill_url", "news_articles", type_="unique")
|
||||
op.create_unique_constraint("news_articles_url_key", "news_articles", ["url"])
|
||||
@@ -109,6 +109,15 @@ async def get_bill(bill_id: str, db: AsyncSession = Depends(get_db)):
|
||||
detail.latest_brief = bill.briefs[0]
|
||||
if bill.trend_scores:
|
||||
detail.latest_trend = bill.trend_scores[0]
|
||||
|
||||
# Trigger a background news refresh if no articles are stored yet
|
||||
if not bill.news_articles:
|
||||
try:
|
||||
from app.workers.news_fetcher import fetch_news_for_bill
|
||||
fetch_news_for_bill.delay(bill_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return detail
|
||||
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from sqlalchemy import Column, Integer, String, Text, Float, DateTime, ForeignKey, Index
|
||||
from sqlalchemy import Column, Integer, String, Text, Float, DateTime, ForeignKey, Index, UniqueConstraint
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
@@ -12,7 +12,7 @@ class NewsArticle(Base):
|
||||
bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False)
|
||||
source = Column(String(200))
|
||||
headline = Column(Text)
|
||||
url = Column(String, unique=True)
|
||||
url = Column(String)
|
||||
published_at = Column(DateTime(timezone=True))
|
||||
relevance_score = Column(Float, default=0.0)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
@@ -20,6 +20,7 @@ class NewsArticle(Base):
|
||||
bill = relationship("Bill", back_populates="news_articles")
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint("bill_id", "url", name="uq_news_articles_bill_url"),
|
||||
Index("ix_news_articles_bill_id", "bill_id"),
|
||||
Index("ix_news_articles_published_at", "published_at"),
|
||||
)
|
||||
|
||||
@@ -50,8 +50,8 @@ def fetch_news_for_bill(self, bill_id: str):
|
||||
url = article.get("url")
|
||||
if not url:
|
||||
continue
|
||||
# Idempotency: skip duplicate URLs
|
||||
existing = db.query(NewsArticle).filter_by(url=url).first()
|
||||
# Idempotency: skip duplicates per bill (same article can appear for multiple bills)
|
||||
existing = db.query(NewsArticle).filter_by(bill_id=bill_id, url=url).first()
|
||||
if existing:
|
||||
continue
|
||||
pub_at = None
|
||||
|
||||
Reference in New Issue
Block a user