fix(news): robust gnews URL extraction + smarter lazy trigger

- Replace fragile entry.get("link") with _gnews_entry_url() helper that checks entry.link attribute then falls back to entry.links[].href, fixing cases where feedparser puts the URL in a non-standard location - Lazy news re-fetch on bill detail now only triggers when the stored trend score confirms gnews_count > 0, preventing endless re-queuing for bills with genuinely no news coverage Co-Authored-By: Jack Levy
2026-03-01 00:49:02 -05:00
parent 50f93468db
commit b57833d4b7
2 changed files with 34 additions and 14 deletions
--- a/backend/app/api/bills.py
+++ b/backend/app/api/bills.py
@@ -110,8 +110,11 @@ async def get_bill(bill_id: str, db: AsyncSession = Depends(get_db)):
    if bill.trend_scores:
        detail.latest_trend = bill.trend_scores[0]
-    # Trigger a background news refresh if no articles are stored yet
+    # Trigger a background news refresh if no articles are stored but trend
-    if not bill.news_articles:
+    # data shows there are gnews results out there waiting to be fetched.
    latest_trend = bill.trend_scores[0] if bill.trend_scores else None
    has_gnews = latest_trend and (latest_trend.gnews_count or 0) > 0
    if not bill.news_articles and has_gnews:
        try:
            from app.workers.news_fetcher import fetch_news_for_bill
            fetch_news_for_bill.delay(bill_id)
--- a/backend/app/services/news_service.py
+++ b/backend/app/services/news_service.py
@@ -89,6 +89,20 @@ def fetch_gnews_count(query: str, days: int = 30) -> int:
        return 0
 def _gnews_entry_url(entry) -> str:
    """Extract the article URL from a feedparser Google News RSS entry."""
    # Primary: entry.link attribute
    link = getattr(entry, "link", None) or entry.get("link", "")
    if link:
        return link
    # Fallback: scan entry.links list for rel=alternate
    for lnk in getattr(entry, "links", []):
        href = lnk.get("href", "")
        if href:
            return href
    return ""
 def fetch_gnews_articles(query: str, days: int = 30) -> list[dict]:
    """Fetch articles from Google News RSS. No rate limit — unlimited source."""
    import time as time_mod
@@ -100,25 +114,28 @@ def fetch_gnews_articles(query: str, days: int = 30) -> list[dict]:
        articles = []
        for entry in feed.entries[:20]:
            pub_at = None
-            if entry.get("published_parsed"):
+            if getattr(entry, "published_parsed", None):
                try:
                    pub_at = datetime.fromtimestamp(
                        time_mod.mktime(entry.published_parsed), tz=timezone.utc
                    ).isoformat()
                except Exception:
                    pass
            # Source: feedparser puts it in entry.source.title for Google News
            source = ""
-            if hasattr(entry, "source") and isinstance(entry.source, dict):
+            src = getattr(entry, "source", None)
-                source = entry.source.get("title", "")
+            if src:
-            elif entry.get("tags"):
+                source = getattr(src, "title", "") or src.get("title", "")
-                source = entry.tags[0].get("term", "") if entry.tags else ""
+            headline = entry.get("title", "") or getattr(entry, "title", "")
-            articles.append({
+            article_url = _gnews_entry_url(entry)
-                "source": source or "Google News",
+            if article_url and headline:
-                "headline": entry.get("title", ""),
+                articles.append({
-                "url": entry.get("link", ""),
+                    "source": source or "Google News",
-                "published_at": pub_at,
+                    "headline": headline,
-            })
+                    "url": article_url,
-        return [a for a in articles if a["url"] and a["headline"]]
+                    "published_at": pub_at,
                })
        return articles
    except Exception as e:
        logger.error(f"Google News RSS article fetch failed: {e}")
        return []