fix(news): robust gnews URL extraction + smarter lazy trigger
- Replace fragile entry.get("link") with _gnews_entry_url() helper that
checks entry.link attribute then falls back to entry.links[].href,
fixing cases where feedparser puts the URL in a non-standard location
- Lazy news re-fetch on bill detail now only triggers when the stored
trend score confirms gnews_count > 0, preventing endless re-queuing
for bills with genuinely no news coverage
Co-Authored-By: Jack Levy
This commit is contained in:
@@ -110,8 +110,11 @@ async def get_bill(bill_id: str, db: AsyncSession = Depends(get_db)):
|
|||||||
if bill.trend_scores:
|
if bill.trend_scores:
|
||||||
detail.latest_trend = bill.trend_scores[0]
|
detail.latest_trend = bill.trend_scores[0]
|
||||||
|
|
||||||
# Trigger a background news refresh if no articles are stored yet
|
# Trigger a background news refresh if no articles are stored but trend
|
||||||
if not bill.news_articles:
|
# data shows there are gnews results out there waiting to be fetched.
|
||||||
|
latest_trend = bill.trend_scores[0] if bill.trend_scores else None
|
||||||
|
has_gnews = latest_trend and (latest_trend.gnews_count or 0) > 0
|
||||||
|
if not bill.news_articles and has_gnews:
|
||||||
try:
|
try:
|
||||||
from app.workers.news_fetcher import fetch_news_for_bill
|
from app.workers.news_fetcher import fetch_news_for_bill
|
||||||
fetch_news_for_bill.delay(bill_id)
|
fetch_news_for_bill.delay(bill_id)
|
||||||
|
|||||||
@@ -89,6 +89,20 @@ def fetch_gnews_count(query: str, days: int = 30) -> int:
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _gnews_entry_url(entry) -> str:
|
||||||
|
"""Extract the article URL from a feedparser Google News RSS entry."""
|
||||||
|
# Primary: entry.link attribute
|
||||||
|
link = getattr(entry, "link", None) or entry.get("link", "")
|
||||||
|
if link:
|
||||||
|
return link
|
||||||
|
# Fallback: scan entry.links list for rel=alternate
|
||||||
|
for lnk in getattr(entry, "links", []):
|
||||||
|
href = lnk.get("href", "")
|
||||||
|
if href:
|
||||||
|
return href
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def fetch_gnews_articles(query: str, days: int = 30) -> list[dict]:
|
def fetch_gnews_articles(query: str, days: int = 30) -> list[dict]:
|
||||||
"""Fetch articles from Google News RSS. No rate limit — unlimited source."""
|
"""Fetch articles from Google News RSS. No rate limit — unlimited source."""
|
||||||
import time as time_mod
|
import time as time_mod
|
||||||
@@ -100,25 +114,28 @@ def fetch_gnews_articles(query: str, days: int = 30) -> list[dict]:
|
|||||||
articles = []
|
articles = []
|
||||||
for entry in feed.entries[:20]:
|
for entry in feed.entries[:20]:
|
||||||
pub_at = None
|
pub_at = None
|
||||||
if entry.get("published_parsed"):
|
if getattr(entry, "published_parsed", None):
|
||||||
try:
|
try:
|
||||||
pub_at = datetime.fromtimestamp(
|
pub_at = datetime.fromtimestamp(
|
||||||
time_mod.mktime(entry.published_parsed), tz=timezone.utc
|
time_mod.mktime(entry.published_parsed), tz=timezone.utc
|
||||||
).isoformat()
|
).isoformat()
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
# Source: feedparser puts it in entry.source.title for Google News
|
||||||
source = ""
|
source = ""
|
||||||
if hasattr(entry, "source") and isinstance(entry.source, dict):
|
src = getattr(entry, "source", None)
|
||||||
source = entry.source.get("title", "")
|
if src:
|
||||||
elif entry.get("tags"):
|
source = getattr(src, "title", "") or src.get("title", "")
|
||||||
source = entry.tags[0].get("term", "") if entry.tags else ""
|
headline = entry.get("title", "") or getattr(entry, "title", "")
|
||||||
articles.append({
|
article_url = _gnews_entry_url(entry)
|
||||||
"source": source or "Google News",
|
if article_url and headline:
|
||||||
"headline": entry.get("title", ""),
|
articles.append({
|
||||||
"url": entry.get("link", ""),
|
"source": source or "Google News",
|
||||||
"published_at": pub_at,
|
"headline": headline,
|
||||||
})
|
"url": article_url,
|
||||||
return [a for a in articles if a["url"] and a["headline"]]
|
"published_at": pub_at,
|
||||||
|
})
|
||||||
|
return articles
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Google News RSS article fetch failed: {e}")
|
logger.error(f"Google News RSS article fetch failed: {e}")
|
||||||
return []
|
return []
|
||||||
|
|||||||
Reference in New Issue
Block a user