Initial commit
This commit is contained in:
19
backend/Dockerfile
Normal file
19
backend/Dockerfile
Normal file
@@ -0,0 +1,19 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# System deps for psycopg2, pdfminer, lxml
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
libpq-dev \
|
||||
libxml2-dev \
|
||||
libxslt-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
# Default command (overridden per service in docker-compose.yml)
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
41
backend/alembic.ini
Normal file
41
backend/alembic.ini
Normal file
@@ -0,0 +1,41 @@
|
||||
[alembic]
|
||||
script_location = alembic
|
||||
prepend_sys_path = .
|
||||
version_path_separator = os
|
||||
sqlalchemy.url = postgresql://congress:congress@postgres:5432/pocketveto
|
||||
|
||||
[post_write_hooks]
|
||||
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARN
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARN
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
51
backend/alembic/env.py
Normal file
51
backend/alembic/env.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import os
|
||||
from logging.config import fileConfig
|
||||
|
||||
from alembic import context
|
||||
from sqlalchemy import engine_from_config, pool
|
||||
|
||||
# Import all models so Alembic can detect them
|
||||
from app.database import Base
|
||||
import app.models # noqa: F401 — registers all models with Base.metadata
|
||||
|
||||
config = context.config
|
||||
|
||||
# Override sqlalchemy.url from environment if set
|
||||
sync_url = os.environ.get("SYNC_DATABASE_URL")
|
||||
if sync_url:
|
||||
config.set_main_option("sqlalchemy.url", sync_url)
|
||||
|
||||
if config.config_file_name is not None:
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
target_metadata = Base.metadata
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
url = config.get_main_option("sqlalchemy.url")
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
connectable = engine_from_config(
|
||||
config.get_section(config.config_ini_section, {}),
|
||||
prefix="sqlalchemy.",
|
||||
poolclass=pool.NullPool,
|
||||
)
|
||||
with connectable.connect() as connection:
|
||||
context.configure(connection=connection, target_metadata=target_metadata)
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
26
backend/alembic/script.py.mako
Normal file
26
backend/alembic/script.py.mako
Normal file
@@ -0,0 +1,26 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = ${repr(up_revision)}
|
||||
down_revision: Union[str, None] = ${repr(down_revision)}
|
||||
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
||||
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
${downgrades if downgrades else "pass"}
|
||||
205
backend/alembic/versions/0001_initial_schema.py
Normal file
205
backend/alembic/versions/0001_initial_schema.py
Normal file
@@ -0,0 +1,205 @@
|
||||
"""initial schema
|
||||
|
||||
Revision ID: 0001
|
||||
Revises:
|
||||
Create Date: 2025-01-01 00:00:00.000000
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
|
||||
revision: str = "0001"
|
||||
down_revision: Union[str, None] = None
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# ── members ──────────────────────────────────────────────────────────────
|
||||
op.create_table(
|
||||
"members",
|
||||
sa.Column("bioguide_id", sa.String(), primary_key=True),
|
||||
sa.Column("name", sa.String(), nullable=False),
|
||||
sa.Column("first_name", sa.String()),
|
||||
sa.Column("last_name", sa.String()),
|
||||
sa.Column("party", sa.String(10)),
|
||||
sa.Column("state", sa.String(5)),
|
||||
sa.Column("chamber", sa.String(10)),
|
||||
sa.Column("district", sa.String(10)),
|
||||
sa.Column("photo_url", sa.String()),
|
||||
sa.Column("official_url", sa.String()),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
||||
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
||||
)
|
||||
|
||||
# ── bills ─────────────────────────────────────────────────────────────────
|
||||
op.create_table(
|
||||
"bills",
|
||||
sa.Column("bill_id", sa.String(), primary_key=True),
|
||||
sa.Column("congress_number", sa.Integer(), nullable=False),
|
||||
sa.Column("bill_type", sa.String(10), nullable=False),
|
||||
sa.Column("bill_number", sa.Integer(), nullable=False),
|
||||
sa.Column("title", sa.Text()),
|
||||
sa.Column("short_title", sa.Text()),
|
||||
sa.Column("sponsor_id", sa.String(), sa.ForeignKey("members.bioguide_id"), nullable=True),
|
||||
sa.Column("introduced_date", sa.Date()),
|
||||
sa.Column("latest_action_date", sa.Date()),
|
||||
sa.Column("latest_action_text", sa.Text()),
|
||||
sa.Column("status", sa.String(100)),
|
||||
sa.Column("chamber", sa.String(10)),
|
||||
sa.Column("congress_url", sa.String()),
|
||||
sa.Column("govtrack_url", sa.String()),
|
||||
sa.Column("last_checked_at", sa.DateTime(timezone=True)),
|
||||
sa.Column("actions_fetched_at", sa.DateTime(timezone=True)),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
||||
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
||||
)
|
||||
op.create_index("ix_bills_congress_number", "bills", ["congress_number"])
|
||||
op.create_index("ix_bills_latest_action_date", "bills", ["latest_action_date"])
|
||||
op.create_index("ix_bills_introduced_date", "bills", ["introduced_date"])
|
||||
op.create_index("ix_bills_chamber", "bills", ["chamber"])
|
||||
op.create_index("ix_bills_sponsor_id", "bills", ["sponsor_id"])
|
||||
|
||||
# Full-text search vector (tsvector generated column) — manual, not in autogenerate
|
||||
op.execute("""
|
||||
ALTER TABLE bills ADD COLUMN search_vector tsvector
|
||||
GENERATED ALWAYS AS (
|
||||
setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
|
||||
setweight(to_tsvector('english', coalesce(short_title, '')), 'A') ||
|
||||
setweight(to_tsvector('english', coalesce(latest_action_text, '')), 'C')
|
||||
) STORED
|
||||
""")
|
||||
op.execute("CREATE INDEX ix_bills_search_vector ON bills USING GIN(search_vector)")
|
||||
|
||||
# ── bill_actions ──────────────────────────────────────────────────────────
|
||||
op.create_table(
|
||||
"bill_actions",
|
||||
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
||||
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
|
||||
sa.Column("action_date", sa.Date()),
|
||||
sa.Column("action_text", sa.Text()),
|
||||
sa.Column("action_type", sa.String(100)),
|
||||
sa.Column("chamber", sa.String(10)),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
||||
)
|
||||
op.create_index("ix_bill_actions_bill_id", "bill_actions", ["bill_id"])
|
||||
op.create_index("ix_bill_actions_action_date", "bill_actions", ["action_date"])
|
||||
|
||||
# ── bill_documents ────────────────────────────────────────────────────────
|
||||
op.create_table(
|
||||
"bill_documents",
|
||||
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
||||
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
|
||||
sa.Column("doc_type", sa.String(50)),
|
||||
sa.Column("doc_version", sa.String(50)),
|
||||
sa.Column("govinfo_url", sa.String()),
|
||||
sa.Column("raw_text", sa.Text()),
|
||||
sa.Column("fetched_at", sa.DateTime(timezone=True)),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
||||
)
|
||||
op.create_index("ix_bill_documents_bill_id", "bill_documents", ["bill_id"])
|
||||
|
||||
# ── bill_briefs ───────────────────────────────────────────────────────────
|
||||
op.create_table(
|
||||
"bill_briefs",
|
||||
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
||||
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
|
||||
sa.Column("document_id", sa.Integer(), sa.ForeignKey("bill_documents.id", ondelete="SET NULL"), nullable=True),
|
||||
sa.Column("summary", sa.Text()),
|
||||
sa.Column("key_points", JSONB()),
|
||||
sa.Column("risks", JSONB()),
|
||||
sa.Column("deadlines", JSONB()),
|
||||
sa.Column("topic_tags", JSONB()),
|
||||
sa.Column("llm_provider", sa.String(50)),
|
||||
sa.Column("llm_model", sa.String(100)),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
||||
)
|
||||
op.create_index("ix_bill_briefs_bill_id", "bill_briefs", ["bill_id"])
|
||||
op.execute("CREATE INDEX ix_bill_briefs_topic_tags ON bill_briefs USING GIN(topic_tags)")
|
||||
|
||||
# ── committees ────────────────────────────────────────────────────────────
|
||||
op.create_table(
|
||||
"committees",
|
||||
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
||||
sa.Column("committee_code", sa.String(20), unique=True, nullable=False),
|
||||
sa.Column("name", sa.String(500)),
|
||||
sa.Column("chamber", sa.String(10)),
|
||||
sa.Column("committee_type", sa.String(50)),
|
||||
)
|
||||
|
||||
# ── committee_bills ───────────────────────────────────────────────────────
|
||||
op.create_table(
|
||||
"committee_bills",
|
||||
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
||||
sa.Column("committee_id", sa.Integer(), sa.ForeignKey("committees.id", ondelete="CASCADE"), nullable=False),
|
||||
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
|
||||
sa.Column("referral_date", sa.Date()),
|
||||
)
|
||||
op.create_index("ix_committee_bills_bill_id", "committee_bills", ["bill_id"])
|
||||
op.create_index("ix_committee_bills_committee_id", "committee_bills", ["committee_id"])
|
||||
|
||||
# ── news_articles ─────────────────────────────────────────────────────────
|
||||
op.create_table(
|
||||
"news_articles",
|
||||
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
||||
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
|
||||
sa.Column("source", sa.String(200)),
|
||||
sa.Column("headline", sa.Text()),
|
||||
sa.Column("url", sa.String(), unique=True),
|
||||
sa.Column("published_at", sa.DateTime(timezone=True)),
|
||||
sa.Column("relevance_score", sa.Float(), default=0.0),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
||||
)
|
||||
op.create_index("ix_news_articles_bill_id", "news_articles", ["bill_id"])
|
||||
op.create_index("ix_news_articles_published_at", "news_articles", ["published_at"])
|
||||
|
||||
# ── trend_scores ──────────────────────────────────────────────────────────
|
||||
op.create_table(
|
||||
"trend_scores",
|
||||
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
||||
sa.Column("bill_id", sa.String(), sa.ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False),
|
||||
sa.Column("score_date", sa.Date(), nullable=False),
|
||||
sa.Column("newsapi_count", sa.Integer(), default=0),
|
||||
sa.Column("gnews_count", sa.Integer(), default=0),
|
||||
sa.Column("gtrends_score", sa.Float(), default=0.0),
|
||||
sa.Column("composite_score", sa.Float(), default=0.0),
|
||||
sa.UniqueConstraint("bill_id", "score_date", name="uq_trend_scores_bill_date"),
|
||||
)
|
||||
op.create_index("ix_trend_scores_bill_id", "trend_scores", ["bill_id"])
|
||||
op.create_index("ix_trend_scores_score_date", "trend_scores", ["score_date"])
|
||||
op.create_index("ix_trend_scores_composite", "trend_scores", ["composite_score"])
|
||||
|
||||
# ── follows ───────────────────────────────────────────────────────────────
|
||||
op.create_table(
|
||||
"follows",
|
||||
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
||||
sa.Column("follow_type", sa.String(20), nullable=False),
|
||||
sa.Column("follow_value", sa.String(), nullable=False),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
||||
sa.UniqueConstraint("follow_type", "follow_value", name="uq_follows_type_value"),
|
||||
)
|
||||
|
||||
# ── app_settings ──────────────────────────────────────────────────────────
|
||||
op.create_table(
|
||||
"app_settings",
|
||||
sa.Column("key", sa.String(), primary_key=True),
|
||||
sa.Column("value", sa.String()),
|
||||
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_table("app_settings")
|
||||
op.drop_table("follows")
|
||||
op.drop_table("trend_scores")
|
||||
op.drop_table("news_articles")
|
||||
op.drop_table("committee_bills")
|
||||
op.drop_table("committees")
|
||||
op.drop_table("bill_briefs")
|
||||
op.drop_table("bill_documents")
|
||||
op.drop_table("bill_actions")
|
||||
op.drop_table("bills")
|
||||
op.drop_table("members")
|
||||
30
backend/alembic/versions/0002_widen_chamber_party_columns.py
Normal file
30
backend/alembic/versions/0002_widen_chamber_party_columns.py
Normal file
@@ -0,0 +1,30 @@
|
||||
"""widen chamber and party columns
|
||||
|
||||
Revision ID: 0002
|
||||
Revises: 0001
|
||||
Create Date: 2026-02-28 00:00:00.000000
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
revision: str = "0002"
|
||||
down_revision: Union[str, None] = "0001"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.alter_column("members", "chamber", type_=sa.String(50))
|
||||
op.alter_column("members", "party", type_=sa.String(50))
|
||||
op.alter_column("bills", "chamber", type_=sa.String(50))
|
||||
op.alter_column("bill_actions", "chamber", type_=sa.String(50))
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.alter_column("bill_actions", "chamber", type_=sa.String(10))
|
||||
op.alter_column("bills", "chamber", type_=sa.String(10))
|
||||
op.alter_column("members", "party", type_=sa.String(10))
|
||||
op.alter_column("members", "chamber", type_=sa.String(10))
|
||||
26
backend/alembic/versions/0003_widen_member_state_district.py
Normal file
26
backend/alembic/versions/0003_widen_member_state_district.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""widen member state and district columns
|
||||
|
||||
Revision ID: 0003
|
||||
Revises: 0002
|
||||
Create Date: 2026-03-01 00:00:00.000000
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
revision: str = "0003"
|
||||
down_revision: Union[str, None] = "0002"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.alter_column("members", "state", type_=sa.String(50))
|
||||
op.alter_column("members", "district", type_=sa.String(50))
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.alter_column("members", "district", type_=sa.String(10))
|
||||
op.alter_column("members", "state", type_=sa.String(5))
|
||||
27
backend/alembic/versions/0004_add_brief_type.py
Normal file
27
backend/alembic/versions/0004_add_brief_type.py
Normal file
@@ -0,0 +1,27 @@
|
||||
"""add brief_type to bill_briefs
|
||||
|
||||
Revision ID: 0004
|
||||
Revises: 0003
|
||||
Create Date: 2026-03-01 00:00:00.000000
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
revision: str = "0004"
|
||||
down_revision: Union[str, None] = "0003"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"bill_briefs",
|
||||
sa.Column("brief_type", sa.String(20), nullable=False, server_default="full"),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("bill_briefs", "brief_type")
|
||||
0
backend/app/api/__init__.py
Normal file
0
backend/app/api/__init__.py
Normal file
39
backend/app/api/admin.py
Normal file
39
backend/app/api/admin.py
Normal file
@@ -0,0 +1,39 @@
|
||||
from fastapi import APIRouter
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/trigger-poll")
|
||||
async def trigger_poll():
|
||||
"""Manually trigger a Congress.gov poll without waiting for the Beat schedule."""
|
||||
from app.workers.congress_poller import poll_congress_bills
|
||||
task = poll_congress_bills.delay()
|
||||
return {"task_id": task.id, "status": "queued"}
|
||||
|
||||
|
||||
@router.post("/trigger-member-sync")
|
||||
async def trigger_member_sync():
|
||||
"""Manually trigger a member sync."""
|
||||
from app.workers.congress_poller import sync_members
|
||||
task = sync_members.delay()
|
||||
return {"task_id": task.id, "status": "queued"}
|
||||
|
||||
|
||||
@router.post("/trigger-trend-scores")
|
||||
async def trigger_trend_scores():
|
||||
"""Manually trigger trend score calculation."""
|
||||
from app.workers.trend_scorer import calculate_all_trend_scores
|
||||
task = calculate_all_trend_scores.delay()
|
||||
return {"task_id": task.id, "status": "queued"}
|
||||
|
||||
|
||||
@router.get("/task-status/{task_id}")
|
||||
async def get_task_status(task_id: str):
|
||||
"""Check the status of an async task."""
|
||||
from app.workers.celery_app import celery_app
|
||||
result = celery_app.AsyncResult(task_id)
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"status": result.status,
|
||||
"result": result.result if result.ready() else None,
|
||||
}
|
||||
145
backend/app/api/bills.py
Normal file
145
backend/app/api/bills.py
Normal file
@@ -0,0 +1,145 @@
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from sqlalchemy import desc, func, or_, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from app.database import get_db
|
||||
from app.models import Bill, BillAction, BillBrief, NewsArticle, TrendScore
|
||||
from app.schemas.schemas import (
|
||||
BillDetailSchema,
|
||||
BillSchema,
|
||||
BillActionSchema,
|
||||
NewsArticleSchema,
|
||||
PaginatedResponse,
|
||||
TrendScoreSchema,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("", response_model=PaginatedResponse[BillSchema])
|
||||
async def list_bills(
|
||||
chamber: Optional[str] = Query(None),
|
||||
topic: Optional[str] = Query(None),
|
||||
sponsor_id: Optional[str] = Query(None),
|
||||
q: Optional[str] = Query(None),
|
||||
page: int = Query(1, ge=1),
|
||||
per_page: int = Query(20, ge=1, le=100),
|
||||
sort: str = Query("latest_action_date"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
query = (
|
||||
select(Bill)
|
||||
.options(
|
||||
selectinload(Bill.sponsor),
|
||||
selectinload(Bill.briefs),
|
||||
selectinload(Bill.trend_scores),
|
||||
)
|
||||
)
|
||||
|
||||
if chamber:
|
||||
query = query.where(Bill.chamber == chamber)
|
||||
if sponsor_id:
|
||||
query = query.where(Bill.sponsor_id == sponsor_id)
|
||||
if topic:
|
||||
query = query.join(BillBrief, Bill.bill_id == BillBrief.bill_id).where(
|
||||
BillBrief.topic_tags.contains([topic])
|
||||
)
|
||||
if q:
|
||||
query = query.where(
|
||||
or_(
|
||||
Bill.bill_id.ilike(f"%{q}%"),
|
||||
Bill.title.ilike(f"%{q}%"),
|
||||
Bill.short_title.ilike(f"%{q}%"),
|
||||
)
|
||||
)
|
||||
|
||||
# Count total
|
||||
count_query = select(func.count()).select_from(query.subquery())
|
||||
total = await db.scalar(count_query) or 0
|
||||
|
||||
# Sort
|
||||
sort_col = getattr(Bill, sort, Bill.latest_action_date)
|
||||
query = query.order_by(desc(sort_col)).offset((page - 1) * per_page).limit(per_page)
|
||||
|
||||
result = await db.execute(query)
|
||||
bills = result.scalars().unique().all()
|
||||
|
||||
# Attach latest brief and trend to each bill
|
||||
items = []
|
||||
for bill in bills:
|
||||
bill_dict = BillSchema.model_validate(bill)
|
||||
if bill.briefs:
|
||||
bill_dict.latest_brief = bill.briefs[0]
|
||||
if bill.trend_scores:
|
||||
bill_dict.latest_trend = bill.trend_scores[0]
|
||||
items.append(bill_dict)
|
||||
|
||||
return PaginatedResponse(
|
||||
items=items,
|
||||
total=total,
|
||||
page=page,
|
||||
per_page=per_page,
|
||||
pages=max(1, (total + per_page - 1) // per_page),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{bill_id}", response_model=BillDetailSchema)
|
||||
async def get_bill(bill_id: str, db: AsyncSession = Depends(get_db)):
|
||||
result = await db.execute(
|
||||
select(Bill)
|
||||
.options(
|
||||
selectinload(Bill.sponsor),
|
||||
selectinload(Bill.actions),
|
||||
selectinload(Bill.briefs),
|
||||
selectinload(Bill.news_articles),
|
||||
selectinload(Bill.trend_scores),
|
||||
)
|
||||
.where(Bill.bill_id == bill_id)
|
||||
)
|
||||
bill = result.scalar_one_or_none()
|
||||
if not bill:
|
||||
from fastapi import HTTPException
|
||||
raise HTTPException(status_code=404, detail="Bill not found")
|
||||
|
||||
detail = BillDetailSchema.model_validate(bill)
|
||||
if bill.briefs:
|
||||
detail.latest_brief = bill.briefs[0]
|
||||
if bill.trend_scores:
|
||||
detail.latest_trend = bill.trend_scores[0]
|
||||
return detail
|
||||
|
||||
|
||||
@router.get("/{bill_id}/actions", response_model=list[BillActionSchema])
|
||||
async def get_bill_actions(bill_id: str, db: AsyncSession = Depends(get_db)):
|
||||
result = await db.execute(
|
||||
select(BillAction)
|
||||
.where(BillAction.bill_id == bill_id)
|
||||
.order_by(desc(BillAction.action_date))
|
||||
)
|
||||
return result.scalars().all()
|
||||
|
||||
|
||||
@router.get("/{bill_id}/news", response_model=list[NewsArticleSchema])
|
||||
async def get_bill_news(bill_id: str, db: AsyncSession = Depends(get_db)):
|
||||
result = await db.execute(
|
||||
select(NewsArticle)
|
||||
.where(NewsArticle.bill_id == bill_id)
|
||||
.order_by(desc(NewsArticle.published_at))
|
||||
.limit(20)
|
||||
)
|
||||
return result.scalars().all()
|
||||
|
||||
|
||||
@router.get("/{bill_id}/trend", response_model=list[TrendScoreSchema])
|
||||
async def get_bill_trend(bill_id: str, days: int = Query(30, ge=7, le=365), db: AsyncSession = Depends(get_db)):
|
||||
from datetime import date, timedelta
|
||||
cutoff = date.today() - timedelta(days=days)
|
||||
result = await db.execute(
|
||||
select(TrendScore)
|
||||
.where(TrendScore.bill_id == bill_id, TrendScore.score_date >= cutoff)
|
||||
.order_by(TrendScore.score_date)
|
||||
)
|
||||
return result.scalars().all()
|
||||
102
backend/app/api/dashboard.py
Normal file
102
backend/app/api/dashboard.py
Normal file
@@ -0,0 +1,102 @@
|
||||
from datetime import date, timedelta
|
||||
|
||||
from fastapi import Depends
|
||||
from fastapi import APIRouter
|
||||
from sqlalchemy import desc, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from app.database import get_db
|
||||
from app.models import Bill, BillBrief, Follow, TrendScore
|
||||
from app.schemas.schemas import BillSchema
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def get_dashboard(db: AsyncSession = Depends(get_db)):
|
||||
# Load all follows
|
||||
follows_result = await db.execute(select(Follow))
|
||||
follows = follows_result.scalars().all()
|
||||
|
||||
followed_bill_ids = [f.follow_value for f in follows if f.follow_type == "bill"]
|
||||
followed_member_ids = [f.follow_value for f in follows if f.follow_type == "member"]
|
||||
followed_topics = [f.follow_value for f in follows if f.follow_type == "topic"]
|
||||
|
||||
feed_bills: list[Bill] = []
|
||||
seen_ids: set[str] = set()
|
||||
|
||||
# 1. Directly followed bills
|
||||
if followed_bill_ids:
|
||||
result = await db.execute(
|
||||
select(Bill)
|
||||
.options(selectinload(Bill.sponsor), selectinload(Bill.briefs), selectinload(Bill.trend_scores))
|
||||
.where(Bill.bill_id.in_(followed_bill_ids))
|
||||
.order_by(desc(Bill.latest_action_date))
|
||||
.limit(20)
|
||||
)
|
||||
for bill in result.scalars().all():
|
||||
if bill.bill_id not in seen_ids:
|
||||
feed_bills.append(bill)
|
||||
seen_ids.add(bill.bill_id)
|
||||
|
||||
# 2. Bills from followed members
|
||||
if followed_member_ids:
|
||||
result = await db.execute(
|
||||
select(Bill)
|
||||
.options(selectinload(Bill.sponsor), selectinload(Bill.briefs), selectinload(Bill.trend_scores))
|
||||
.where(Bill.sponsor_id.in_(followed_member_ids))
|
||||
.order_by(desc(Bill.latest_action_date))
|
||||
.limit(20)
|
||||
)
|
||||
for bill in result.scalars().all():
|
||||
if bill.bill_id not in seen_ids:
|
||||
feed_bills.append(bill)
|
||||
seen_ids.add(bill.bill_id)
|
||||
|
||||
# 3. Bills matching followed topics
|
||||
for topic in followed_topics:
|
||||
result = await db.execute(
|
||||
select(Bill)
|
||||
.options(selectinload(Bill.sponsor), selectinload(Bill.briefs), selectinload(Bill.trend_scores))
|
||||
.join(BillBrief, Bill.bill_id == BillBrief.bill_id)
|
||||
.where(BillBrief.topic_tags.contains([topic]))
|
||||
.order_by(desc(Bill.latest_action_date))
|
||||
.limit(10)
|
||||
)
|
||||
for bill in result.scalars().all():
|
||||
if bill.bill_id not in seen_ids:
|
||||
feed_bills.append(bill)
|
||||
seen_ids.add(bill.bill_id)
|
||||
|
||||
# Sort feed by latest action date
|
||||
feed_bills.sort(key=lambda b: b.latest_action_date or date.min, reverse=True)
|
||||
|
||||
# 4. Trending bills (top 10 by composite score today)
|
||||
trending_result = await db.execute(
|
||||
select(Bill)
|
||||
.options(selectinload(Bill.sponsor), selectinload(Bill.briefs), selectinload(Bill.trend_scores))
|
||||
.join(TrendScore, Bill.bill_id == TrendScore.bill_id)
|
||||
.where(TrendScore.score_date >= date.today() - timedelta(days=1))
|
||||
.order_by(desc(TrendScore.composite_score))
|
||||
.limit(10)
|
||||
)
|
||||
trending_bills = trending_result.scalars().unique().all()
|
||||
|
||||
def serialize_bill(bill: Bill) -> dict:
|
||||
b = BillSchema.model_validate(bill)
|
||||
if bill.briefs:
|
||||
b.latest_brief = bill.briefs[0]
|
||||
if bill.trend_scores:
|
||||
b.latest_trend = bill.trend_scores[0]
|
||||
return b.model_dump()
|
||||
|
||||
return {
|
||||
"feed": [serialize_bill(b) for b in feed_bills[:50]],
|
||||
"trending": [serialize_bill(b) for b in trending_bills],
|
||||
"follows": {
|
||||
"bills": len(followed_bill_ids),
|
||||
"members": len(followed_member_ids),
|
||||
"topics": len(followed_topics),
|
||||
},
|
||||
}
|
||||
49
backend/app/api/follows.py
Normal file
49
backend/app/api/follows.py
Normal file
@@ -0,0 +1,49 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.database import get_db
|
||||
from app.models import Follow
|
||||
from app.schemas.schemas import FollowCreate, FollowSchema
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
VALID_FOLLOW_TYPES = {"bill", "member", "topic"}
|
||||
|
||||
|
||||
@router.get("", response_model=list[FollowSchema])
|
||||
async def list_follows(db: AsyncSession = Depends(get_db)):
|
||||
result = await db.execute(select(Follow).order_by(Follow.created_at.desc()))
|
||||
return result.scalars().all()
|
||||
|
||||
|
||||
@router.post("", response_model=FollowSchema, status_code=201)
|
||||
async def add_follow(body: FollowCreate, db: AsyncSession = Depends(get_db)):
|
||||
if body.follow_type not in VALID_FOLLOW_TYPES:
|
||||
raise HTTPException(status_code=400, detail=f"follow_type must be one of {VALID_FOLLOW_TYPES}")
|
||||
follow = Follow(follow_type=body.follow_type, follow_value=body.follow_value)
|
||||
db.add(follow)
|
||||
try:
|
||||
await db.commit()
|
||||
await db.refresh(follow)
|
||||
except IntegrityError:
|
||||
await db.rollback()
|
||||
# Already following — return existing
|
||||
result = await db.execute(
|
||||
select(Follow).where(
|
||||
Follow.follow_type == body.follow_type,
|
||||
Follow.follow_value == body.follow_value,
|
||||
)
|
||||
)
|
||||
return result.scalar_one()
|
||||
return follow
|
||||
|
||||
|
||||
@router.delete("/{follow_id}", status_code=204)
|
||||
async def remove_follow(follow_id: int, db: AsyncSession = Depends(get_db)):
|
||||
follow = await db.get(Follow, follow_id)
|
||||
if not follow:
|
||||
raise HTTPException(status_code=404, detail="Follow not found")
|
||||
await db.delete(follow)
|
||||
await db.commit()
|
||||
43
backend/app/api/health.py
Normal file
43
backend/app/api/health.py
Normal file
@@ -0,0 +1,43 @@
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import redis as redis_lib
|
||||
from fastapi import APIRouter, Depends
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.config import settings
|
||||
from app.database import get_db
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def health():
|
||||
return {"status": "ok", "timestamp": datetime.now(timezone.utc).isoformat()}
|
||||
|
||||
|
||||
@router.get("/detailed")
|
||||
async def health_detailed(db: AsyncSession = Depends(get_db)):
|
||||
# Check DB
|
||||
db_ok = False
|
||||
try:
|
||||
await db.execute(text("SELECT 1"))
|
||||
db_ok = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check Redis
|
||||
redis_ok = False
|
||||
try:
|
||||
r = redis_lib.from_url(settings.REDIS_URL)
|
||||
redis_ok = r.ping()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
status = "ok" if (db_ok and redis_ok) else "degraded"
|
||||
return {
|
||||
"status": status,
|
||||
"database": "ok" if db_ok else "error",
|
||||
"redis": "ok" if redis_ok else "error",
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
85
backend/app/api/members.py
Normal file
85
backend/app/api/members.py
Normal file
@@ -0,0 +1,85 @@
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy import desc, func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from app.database import get_db
|
||||
from app.models import Bill, Member
|
||||
from app.schemas.schemas import BillSchema, MemberSchema, PaginatedResponse
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("", response_model=PaginatedResponse[MemberSchema])
|
||||
async def list_members(
|
||||
chamber: Optional[str] = Query(None),
|
||||
party: Optional[str] = Query(None),
|
||||
state: Optional[str] = Query(None),
|
||||
q: Optional[str] = Query(None),
|
||||
page: int = Query(1, ge=1),
|
||||
per_page: int = Query(50, ge=1, le=250),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
query = select(Member)
|
||||
if chamber:
|
||||
query = query.where(Member.chamber == chamber)
|
||||
if party:
|
||||
query = query.where(Member.party == party)
|
||||
if state:
|
||||
query = query.where(Member.state == state)
|
||||
if q:
|
||||
query = query.where(Member.name.ilike(f"%{q}%"))
|
||||
|
||||
total = await db.scalar(select(func.count()).select_from(query.subquery())) or 0
|
||||
query = query.order_by(Member.last_name, Member.first_name).offset((page - 1) * per_page).limit(per_page)
|
||||
|
||||
result = await db.execute(query)
|
||||
members = result.scalars().all()
|
||||
|
||||
return PaginatedResponse(
|
||||
items=members,
|
||||
total=total,
|
||||
page=page,
|
||||
per_page=per_page,
|
||||
pages=max(1, (total + per_page - 1) // per_page),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{bioguide_id}", response_model=MemberSchema)
|
||||
async def get_member(bioguide_id: str, db: AsyncSession = Depends(get_db)):
|
||||
member = await db.get(Member, bioguide_id)
|
||||
if not member:
|
||||
raise HTTPException(status_code=404, detail="Member not found")
|
||||
return member
|
||||
|
||||
|
||||
@router.get("/{bioguide_id}/bills", response_model=PaginatedResponse[BillSchema])
|
||||
async def get_member_bills(
|
||||
bioguide_id: str,
|
||||
page: int = Query(1, ge=1),
|
||||
per_page: int = Query(20, ge=1, le=100),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
query = select(Bill).options(selectinload(Bill.briefs)).where(Bill.sponsor_id == bioguide_id)
|
||||
total = await db.scalar(select(func.count()).select_from(query.subquery())) or 0
|
||||
query = query.order_by(desc(Bill.introduced_date)).offset((page - 1) * per_page).limit(per_page)
|
||||
|
||||
result = await db.execute(query)
|
||||
bills = result.scalars().all()
|
||||
|
||||
items = []
|
||||
for bill in bills:
|
||||
b = BillSchema.model_validate(bill)
|
||||
if bill.briefs:
|
||||
b.latest_brief = bill.briefs[0]
|
||||
items.append(b)
|
||||
|
||||
return PaginatedResponse(
|
||||
items=items,
|
||||
total=total,
|
||||
page=page,
|
||||
per_page=per_page,
|
||||
pages=max(1, (total + per_page - 1) // per_page),
|
||||
)
|
||||
53
backend/app/api/search.py
Normal file
53
backend/app/api/search.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.database import get_db
|
||||
from app.models import Bill, Member
|
||||
from app.schemas.schemas import BillSchema, MemberSchema
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def search(
|
||||
q: str = Query(..., min_length=2),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
# Bill ID direct match
|
||||
id_results = await db.execute(
|
||||
select(Bill).where(Bill.bill_id.ilike(f"%{q}%")).limit(20)
|
||||
)
|
||||
id_bills = id_results.scalars().all()
|
||||
|
||||
# Full-text search on title/content via tsvector
|
||||
fts_results = await db.execute(
|
||||
select(Bill)
|
||||
.where(text("search_vector @@ plainto_tsquery('english', :q)"))
|
||||
.order_by(text("ts_rank(search_vector, plainto_tsquery('english', :q)) DESC"))
|
||||
.limit(20)
|
||||
.params(q=q)
|
||||
)
|
||||
fts_bills = fts_results.scalars().all()
|
||||
|
||||
# Merge, dedup, preserve order (ID matches first)
|
||||
seen = set()
|
||||
bills = []
|
||||
for b in id_bills + fts_bills:
|
||||
if b.bill_id not in seen:
|
||||
seen.add(b.bill_id)
|
||||
bills.append(b)
|
||||
|
||||
# Fuzzy member search
|
||||
member_results = await db.execute(
|
||||
select(Member)
|
||||
.where(Member.name.ilike(f"%{q}%"))
|
||||
.order_by(Member.last_name)
|
||||
.limit(10)
|
||||
)
|
||||
members = member_results.scalars().all()
|
||||
|
||||
return {
|
||||
"bills": [BillSchema.model_validate(b) for b in bills],
|
||||
"members": [MemberSchema.model_validate(m) for m in members],
|
||||
}
|
||||
86
backend/app/api/settings.py
Normal file
86
backend/app/api/settings.py
Normal file
@@ -0,0 +1,86 @@
|
||||
from fastapi import APIRouter, Depends
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.config import settings
|
||||
from app.database import get_db
|
||||
from app.models import AppSetting
|
||||
from app.schemas.schemas import SettingUpdate, SettingsResponse
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("", response_model=SettingsResponse)
|
||||
async def get_settings(db: AsyncSession = Depends(get_db)):
|
||||
"""Return current effective settings (env + DB overrides)."""
|
||||
# DB overrides take precedence over env vars
|
||||
overrides: dict[str, str] = {}
|
||||
result = await db.execute(select(AppSetting))
|
||||
for row in result.scalars().all():
|
||||
overrides[row.key] = row.value
|
||||
|
||||
return SettingsResponse(
|
||||
llm_provider=overrides.get("llm_provider", settings.LLM_PROVIDER),
|
||||
llm_model=overrides.get("llm_model", _current_model(overrides.get("llm_provider", settings.LLM_PROVIDER))),
|
||||
congress_poll_interval_minutes=int(overrides.get("congress_poll_interval_minutes", settings.CONGRESS_POLL_INTERVAL_MINUTES)),
|
||||
newsapi_enabled=bool(settings.NEWSAPI_KEY),
|
||||
pytrends_enabled=settings.PYTRENDS_ENABLED,
|
||||
)
|
||||
|
||||
|
||||
@router.put("")
|
||||
async def update_setting(body: SettingUpdate, db: AsyncSession = Depends(get_db)):
|
||||
"""Update a runtime setting."""
|
||||
ALLOWED_KEYS = {"llm_provider", "llm_model", "congress_poll_interval_minutes"}
|
||||
if body.key not in ALLOWED_KEYS:
|
||||
from fastapi import HTTPException
|
||||
raise HTTPException(status_code=400, detail=f"Allowed setting keys: {ALLOWED_KEYS}")
|
||||
|
||||
existing = await db.get(AppSetting, body.key)
|
||||
if existing:
|
||||
existing.value = body.value
|
||||
else:
|
||||
db.add(AppSetting(key=body.key, value=body.value))
|
||||
await db.commit()
|
||||
return {"key": body.key, "value": body.value}
|
||||
|
||||
|
||||
@router.post("/test-llm")
|
||||
async def test_llm_connection():
|
||||
"""Test that the configured LLM provider responds correctly."""
|
||||
from app.services.llm_service import get_llm_provider
|
||||
try:
|
||||
provider = get_llm_provider()
|
||||
brief = provider.generate_brief(
|
||||
doc_text="This is a test bill for connection verification purposes.",
|
||||
bill_metadata={
|
||||
"title": "Test Connection Bill",
|
||||
"sponsor_name": "Test Sponsor",
|
||||
"party": "Test",
|
||||
"state": "DC",
|
||||
"chamber": "House",
|
||||
"introduced_date": "2025-01-01",
|
||||
"latest_action_text": "Test action",
|
||||
"latest_action_date": "2025-01-01",
|
||||
},
|
||||
)
|
||||
return {
|
||||
"status": "ok",
|
||||
"provider": brief.llm_provider,
|
||||
"model": brief.llm_model,
|
||||
"summary_preview": brief.summary[:100] + "..." if len(brief.summary) > 100 else brief.summary,
|
||||
}
|
||||
except Exception as e:
|
||||
return {"status": "error", "detail": str(e)}
|
||||
|
||||
|
||||
def _current_model(provider: str) -> str:
|
||||
if provider == "openai":
|
||||
return settings.OPENAI_MODEL
|
||||
elif provider == "anthropic":
|
||||
return settings.ANTHROPIC_MODEL
|
||||
elif provider == "gemini":
|
||||
return settings.GEMINI_MODEL
|
||||
elif provider == "ollama":
|
||||
return settings.OLLAMA_MODEL
|
||||
return "unknown"
|
||||
50
backend/app/config.py
Normal file
50
backend/app/config.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from functools import lru_cache
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
|
||||
|
||||
# URLs
|
||||
LOCAL_URL: str = "http://localhost"
|
||||
PUBLIC_URL: str = ""
|
||||
|
||||
# Database
|
||||
DATABASE_URL: str = "postgresql+asyncpg://congress:congress@postgres:5432/pocketveto"
|
||||
SYNC_DATABASE_URL: str = "postgresql://congress:congress@postgres:5432/pocketveto"
|
||||
|
||||
# Redis
|
||||
REDIS_URL: str = "redis://redis:6379/0"
|
||||
|
||||
# api.data.gov (shared key for Congress.gov and GovInfo)
|
||||
DATA_GOV_API_KEY: str = ""
|
||||
CONGRESS_POLL_INTERVAL_MINUTES: int = 30
|
||||
|
||||
# LLM
|
||||
LLM_PROVIDER: str = "openai" # openai | anthropic | gemini | ollama
|
||||
|
||||
OPENAI_API_KEY: str = ""
|
||||
OPENAI_MODEL: str = "gpt-4o"
|
||||
|
||||
ANTHROPIC_API_KEY: str = ""
|
||||
ANTHROPIC_MODEL: str = "claude-opus-4-6"
|
||||
|
||||
GEMINI_API_KEY: str = ""
|
||||
GEMINI_MODEL: str = "gemini-1.5-pro"
|
||||
|
||||
OLLAMA_BASE_URL: str = "http://host.docker.internal:11434"
|
||||
OLLAMA_MODEL: str = "llama3.1"
|
||||
|
||||
# News
|
||||
NEWSAPI_KEY: str = ""
|
||||
|
||||
# pytrends
|
||||
PYTRENDS_ENABLED: bool = True
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_settings() -> Settings:
|
||||
return Settings()
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
53
backend/app/database.py
Normal file
53
backend/app/database.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
||||
from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
|
||||
|
||||
from app.config import settings
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
|
||||
|
||||
# ─── Async engine (FastAPI) ───────────────────────────────────────────────────
|
||||
|
||||
async_engine = create_async_engine(
|
||||
settings.DATABASE_URL,
|
||||
echo=False,
|
||||
pool_size=10,
|
||||
max_overflow=20,
|
||||
)
|
||||
|
||||
AsyncSessionLocal = async_sessionmaker(
|
||||
async_engine,
|
||||
expire_on_commit=False,
|
||||
class_=AsyncSession,
|
||||
)
|
||||
|
||||
|
||||
async def get_db() -> AsyncGenerator[AsyncSession, None]:
|
||||
async with AsyncSessionLocal() as session:
|
||||
yield session
|
||||
|
||||
|
||||
# ─── Sync engine (Celery workers) ────────────────────────────────────────────
|
||||
|
||||
sync_engine = create_engine(
|
||||
settings.SYNC_DATABASE_URL,
|
||||
pool_size=5,
|
||||
max_overflow=10,
|
||||
pool_pre_ping=True,
|
||||
)
|
||||
|
||||
SyncSessionLocal = sessionmaker(
|
||||
bind=sync_engine,
|
||||
autoflush=False,
|
||||
autocommit=False,
|
||||
)
|
||||
|
||||
|
||||
def get_sync_db() -> Session:
|
||||
return SyncSessionLocal()
|
||||
28
backend/app/main.py
Normal file
28
backend/app/main.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app.api import bills, members, follows, dashboard, search, settings, admin, health
|
||||
from app.config import settings as config
|
||||
|
||||
app = FastAPI(
|
||||
title="PocketVeto",
|
||||
description="Monitor US Congressional activity with AI-powered bill summaries.",
|
||||
version="1.0.0",
|
||||
)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=[o for o in [config.LOCAL_URL, config.PUBLIC_URL] if o],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
app.include_router(bills.router, prefix="/api/bills", tags=["bills"])
|
||||
app.include_router(members.router, prefix="/api/members", tags=["members"])
|
||||
app.include_router(follows.router, prefix="/api/follows", tags=["follows"])
|
||||
app.include_router(dashboard.router, prefix="/api/dashboard", tags=["dashboard"])
|
||||
app.include_router(search.router, prefix="/api/search", tags=["search"])
|
||||
app.include_router(settings.router, prefix="/api/settings", tags=["settings"])
|
||||
app.include_router(admin.router, prefix="/api/admin", tags=["admin"])
|
||||
app.include_router(health.router, prefix="/api/health", tags=["health"])
|
||||
0
backend/app/management/__init__.py
Normal file
0
backend/app/management/__init__.py
Normal file
117
backend/app/management/backfill.py
Normal file
117
backend/app/management/backfill.py
Normal file
@@ -0,0 +1,117 @@
|
||||
"""
|
||||
Historical data backfill script.
|
||||
|
||||
Usage (run inside the api or worker container):
|
||||
python -m app.management.backfill --congress 118 119
|
||||
python -m app.management.backfill --congress 119 --skip-llm
|
||||
|
||||
This script fetches all bills from the specified Congress numbers,
|
||||
stores them in the database, and (optionally) enqueues document fetch
|
||||
and LLM processing tasks for each bill.
|
||||
|
||||
Cost note: LLM processing 15,000+ bills can be expensive.
|
||||
Consider using --skip-llm for initial backfill and processing
|
||||
manually / in batches.
|
||||
"""
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def backfill_congress(congress_number: int, skip_llm: bool = False, dry_run: bool = False):
|
||||
from app.database import get_sync_db
|
||||
from app.models import AppSetting, Bill, Member
|
||||
from app.services import congress_api
|
||||
from app.workers.congress_poller import _sync_sponsor
|
||||
|
||||
db = get_sync_db()
|
||||
offset = 0
|
||||
total_processed = 0
|
||||
total_new = 0
|
||||
|
||||
logger.info(f"Starting backfill for Congress {congress_number} (skip_llm={skip_llm}, dry_run={dry_run})")
|
||||
|
||||
try:
|
||||
while True:
|
||||
response = congress_api.get_bills(congress=congress_number, offset=offset, limit=250)
|
||||
bills_data = response.get("bills", [])
|
||||
|
||||
if not bills_data:
|
||||
break
|
||||
|
||||
for bill_data in bills_data:
|
||||
parsed = congress_api.parse_bill_from_api(bill_data, congress_number)
|
||||
bill_id = parsed["bill_id"]
|
||||
|
||||
if dry_run:
|
||||
logger.info(f"[DRY RUN] Would process: {bill_id}")
|
||||
total_processed += 1
|
||||
continue
|
||||
|
||||
existing = db.get(Bill, bill_id)
|
||||
if existing:
|
||||
total_processed += 1
|
||||
continue
|
||||
|
||||
# Sync sponsor
|
||||
sponsor_id = _sync_sponsor(db, bill_data)
|
||||
parsed["sponsor_id"] = sponsor_id
|
||||
|
||||
db.add(Bill(**parsed))
|
||||
total_new += 1
|
||||
total_processed += 1
|
||||
|
||||
if total_new % 50 == 0:
|
||||
db.commit()
|
||||
logger.info(f"Progress: {total_processed} processed, {total_new} new")
|
||||
|
||||
# Enqueue document + LLM at low priority
|
||||
if not skip_llm:
|
||||
from app.workers.document_fetcher import fetch_bill_documents
|
||||
fetch_bill_documents.apply_async(args=[bill_id], priority=3)
|
||||
|
||||
# Stay well under Congress.gov rate limit (5,000/hr = ~1.4/sec)
|
||||
time.sleep(0.25)
|
||||
|
||||
db.commit()
|
||||
offset += 250
|
||||
|
||||
if len(bills_data) < 250:
|
||||
break # Last page
|
||||
|
||||
logger.info(f"Fetched page ending at offset {offset}, total processed: {total_processed}")
|
||||
time.sleep(1) # Polite pause between pages
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Interrupted by user")
|
||||
db.commit()
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
logger.info(f"Backfill complete: {total_new} new bills added ({total_processed} total processed)")
|
||||
return total_new
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Backfill Congressional bill data")
|
||||
parser.add_argument("--congress", type=int, nargs="+", default=[119],
|
||||
help="Congress numbers to backfill (default: 119)")
|
||||
parser.add_argument("--skip-llm", action="store_true",
|
||||
help="Skip LLM processing (fetch documents only, don't enqueue briefs)")
|
||||
parser.add_argument("--dry-run", action="store_true",
|
||||
help="Count bills without actually inserting them")
|
||||
args = parser.parse_args()
|
||||
|
||||
total = 0
|
||||
for congress_number in args.congress:
|
||||
total += backfill_congress(congress_number, skip_llm=args.skip_llm, dry_run=args.dry_run)
|
||||
|
||||
logger.info(f"All done. Total new bills: {total}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
22
backend/app/models/__init__.py
Normal file
22
backend/app/models/__init__.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from app.models.bill import Bill, BillAction, BillDocument
|
||||
from app.models.brief import BillBrief
|
||||
from app.models.follow import Follow
|
||||
from app.models.member import Member
|
||||
from app.models.news import NewsArticle
|
||||
from app.models.setting import AppSetting
|
||||
from app.models.trend import TrendScore
|
||||
from app.models.committee import Committee, CommitteeBill
|
||||
|
||||
__all__ = [
|
||||
"Bill",
|
||||
"BillAction",
|
||||
"BillDocument",
|
||||
"BillBrief",
|
||||
"Follow",
|
||||
"Member",
|
||||
"NewsArticle",
|
||||
"AppSetting",
|
||||
"TrendScore",
|
||||
"Committee",
|
||||
"CommitteeBill",
|
||||
]
|
||||
88
backend/app/models/bill.py
Normal file
88
backend/app/models/bill.py
Normal file
@@ -0,0 +1,88 @@
|
||||
from sqlalchemy import (
|
||||
Column, String, Integer, Date, DateTime, Text, ForeignKey, Index
|
||||
)
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class Bill(Base):
|
||||
__tablename__ = "bills"
|
||||
|
||||
# Natural key: "{congress}-{bill_type_lower}-{bill_number}" e.g. "119-hr-1234"
|
||||
bill_id = Column(String, primary_key=True)
|
||||
congress_number = Column(Integer, nullable=False)
|
||||
bill_type = Column(String(10), nullable=False) # hr, s, hjres, sjres, hconres, sconres, hres, sres
|
||||
bill_number = Column(Integer, nullable=False)
|
||||
title = Column(Text)
|
||||
short_title = Column(Text)
|
||||
sponsor_id = Column(String, ForeignKey("members.bioguide_id"), nullable=True)
|
||||
introduced_date = Column(Date)
|
||||
latest_action_date = Column(Date)
|
||||
latest_action_text = Column(Text)
|
||||
status = Column(String(100))
|
||||
chamber = Column(String(50))
|
||||
congress_url = Column(String)
|
||||
govtrack_url = Column(String)
|
||||
|
||||
# Ingestion tracking
|
||||
last_checked_at = Column(DateTime(timezone=True))
|
||||
actions_fetched_at = Column(DateTime(timezone=True))
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())
|
||||
|
||||
sponsor = relationship("Member", back_populates="bills", foreign_keys=[sponsor_id])
|
||||
actions = relationship("BillAction", back_populates="bill", order_by="desc(BillAction.action_date)")
|
||||
documents = relationship("BillDocument", back_populates="bill")
|
||||
briefs = relationship("BillBrief", back_populates="bill", order_by="desc(BillBrief.created_at)")
|
||||
news_articles = relationship("NewsArticle", back_populates="bill", order_by="desc(NewsArticle.published_at)")
|
||||
trend_scores = relationship("TrendScore", back_populates="bill", order_by="desc(TrendScore.score_date)")
|
||||
committee_bills = relationship("CommitteeBill", back_populates="bill")
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_bills_congress_number", "congress_number"),
|
||||
Index("ix_bills_latest_action_date", "latest_action_date"),
|
||||
Index("ix_bills_introduced_date", "introduced_date"),
|
||||
Index("ix_bills_chamber", "chamber"),
|
||||
Index("ix_bills_sponsor_id", "sponsor_id"),
|
||||
)
|
||||
|
||||
|
||||
class BillAction(Base):
|
||||
__tablename__ = "bill_actions"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False)
|
||||
action_date = Column(Date)
|
||||
action_text = Column(Text)
|
||||
action_type = Column(String(100))
|
||||
chamber = Column(String(50))
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
bill = relationship("Bill", back_populates="actions")
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_bill_actions_bill_id", "bill_id"),
|
||||
Index("ix_bill_actions_action_date", "action_date"),
|
||||
)
|
||||
|
||||
|
||||
class BillDocument(Base):
|
||||
__tablename__ = "bill_documents"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False)
|
||||
doc_type = Column(String(50)) # bill_text | committee_report | amendment
|
||||
doc_version = Column(String(50)) # Introduced, Enrolled, etc.
|
||||
govinfo_url = Column(String)
|
||||
raw_text = Column(Text)
|
||||
fetched_at = Column(DateTime(timezone=True))
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
bill = relationship("Bill", back_populates="documents")
|
||||
briefs = relationship("BillBrief", back_populates="document")
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_bill_documents_bill_id", "bill_id"),
|
||||
)
|
||||
31
backend/app/models/brief.py
Normal file
31
backend/app/models/brief.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from sqlalchemy import Column, Integer, String, Text, ForeignKey, DateTime, Index
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class BillBrief(Base):
|
||||
__tablename__ = "bill_briefs"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False)
|
||||
document_id = Column(Integer, ForeignKey("bill_documents.id", ondelete="SET NULL"), nullable=True)
|
||||
brief_type = Column(String(20), nullable=False, server_default="full") # full | amendment
|
||||
summary = Column(Text)
|
||||
key_points = Column(JSONB) # list[str]
|
||||
risks = Column(JSONB) # list[str]
|
||||
deadlines = Column(JSONB) # list[{date: str, description: str}]
|
||||
topic_tags = Column(JSONB) # list[str]
|
||||
llm_provider = Column(String(50))
|
||||
llm_model = Column(String(100))
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
bill = relationship("Bill", back_populates="briefs")
|
||||
document = relationship("BillDocument", back_populates="briefs")
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_bill_briefs_bill_id", "bill_id"),
|
||||
Index("ix_bill_briefs_topic_tags", "topic_tags", postgresql_using="gin"),
|
||||
)
|
||||
33
backend/app/models/committee.py
Normal file
33
backend/app/models/committee.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from sqlalchemy import Column, Integer, String, Date, ForeignKey, Index
|
||||
from sqlalchemy.orm import relationship
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class Committee(Base):
|
||||
__tablename__ = "committees"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
committee_code = Column(String(20), unique=True, nullable=False)
|
||||
name = Column(String(500))
|
||||
chamber = Column(String(10))
|
||||
committee_type = Column(String(50)) # Standing, Select, Joint, etc.
|
||||
|
||||
committee_bills = relationship("CommitteeBill", back_populates="committee")
|
||||
|
||||
|
||||
class CommitteeBill(Base):
|
||||
__tablename__ = "committee_bills"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
committee_id = Column(Integer, ForeignKey("committees.id", ondelete="CASCADE"), nullable=False)
|
||||
bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False)
|
||||
referral_date = Column(Date)
|
||||
|
||||
committee = relationship("Committee", back_populates="committee_bills")
|
||||
bill = relationship("Bill", back_populates="committee_bills")
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_committee_bills_bill_id", "bill_id"),
|
||||
Index("ix_committee_bills_committee_id", "committee_id"),
|
||||
)
|
||||
17
backend/app/models/follow.py
Normal file
17
backend/app/models/follow.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from sqlalchemy import Column, Integer, String, DateTime, UniqueConstraint
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class Follow(Base):
|
||||
__tablename__ = "follows"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
follow_type = Column(String(20), nullable=False) # bill | member | topic
|
||||
follow_value = Column(String, nullable=False) # bill_id | bioguide_id | tag string
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint("follow_type", "follow_value", name="uq_follows_type_value"),
|
||||
)
|
||||
24
backend/app/models/member.py
Normal file
24
backend/app/models/member.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from sqlalchemy import Column, String, DateTime
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class Member(Base):
|
||||
__tablename__ = "members"
|
||||
|
||||
bioguide_id = Column(String, primary_key=True)
|
||||
name = Column(String, nullable=False)
|
||||
first_name = Column(String)
|
||||
last_name = Column(String)
|
||||
party = Column(String(50))
|
||||
state = Column(String(50))
|
||||
chamber = Column(String(50))
|
||||
district = Column(String(50))
|
||||
photo_url = Column(String)
|
||||
official_url = Column(String)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())
|
||||
|
||||
bills = relationship("Bill", back_populates="sponsor", foreign_keys="Bill.sponsor_id")
|
||||
25
backend/app/models/news.py
Normal file
25
backend/app/models/news.py
Normal file
@@ -0,0 +1,25 @@
|
||||
from sqlalchemy import Column, Integer, String, Text, Float, DateTime, ForeignKey, Index
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class NewsArticle(Base):
|
||||
__tablename__ = "news_articles"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False)
|
||||
source = Column(String(200))
|
||||
headline = Column(Text)
|
||||
url = Column(String, unique=True)
|
||||
published_at = Column(DateTime(timezone=True))
|
||||
relevance_score = Column(Float, default=0.0)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
bill = relationship("Bill", back_populates="news_articles")
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_news_articles_bill_id", "bill_id"),
|
||||
Index("ix_news_articles_published_at", "published_at"),
|
||||
)
|
||||
12
backend/app/models/setting.py
Normal file
12
backend/app/models/setting.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from sqlalchemy import Column, String, DateTime
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class AppSetting(Base):
|
||||
__tablename__ = "app_settings"
|
||||
|
||||
key = Column(String, primary_key=True)
|
||||
value = Column(String)
|
||||
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())
|
||||
25
backend/app/models/trend.py
Normal file
25
backend/app/models/trend.py
Normal file
@@ -0,0 +1,25 @@
|
||||
from sqlalchemy import Column, Integer, String, Date, Float, ForeignKey, Index, UniqueConstraint
|
||||
from sqlalchemy.orm import relationship
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class TrendScore(Base):
|
||||
__tablename__ = "trend_scores"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
bill_id = Column(String, ForeignKey("bills.bill_id", ondelete="CASCADE"), nullable=False)
|
||||
score_date = Column(Date, nullable=False)
|
||||
newsapi_count = Column(Integer, default=0)
|
||||
gnews_count = Column(Integer, default=0)
|
||||
gtrends_score = Column(Float, default=0.0)
|
||||
composite_score = Column(Float, default=0.0)
|
||||
|
||||
bill = relationship("Bill", back_populates="trend_scores")
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint("bill_id", "score_date", name="uq_trend_scores_bill_date"),
|
||||
Index("ix_trend_scores_bill_id", "bill_id"),
|
||||
Index("ix_trend_scores_score_date", "score_date"),
|
||||
Index("ix_trend_scores_composite", "composite_score"),
|
||||
)
|
||||
0
backend/app/schemas/__init__.py
Normal file
0
backend/app/schemas/__init__.py
Normal file
145
backend/app/schemas/schemas.py
Normal file
145
backend/app/schemas/schemas.py
Normal file
@@ -0,0 +1,145 @@
|
||||
from datetime import date, datetime
|
||||
from typing import Any, Generic, Optional, TypeVar
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class PaginatedResponse(BaseModel, Generic[T]):
|
||||
items: list[T]
|
||||
total: int
|
||||
page: int
|
||||
per_page: int
|
||||
pages: int
|
||||
|
||||
|
||||
# ── Member ────────────────────────────────────────────────────────────────────
|
||||
|
||||
class MemberSchema(BaseModel):
|
||||
bioguide_id: str
|
||||
name: str
|
||||
first_name: Optional[str] = None
|
||||
last_name: Optional[str] = None
|
||||
party: Optional[str] = None
|
||||
state: Optional[str] = None
|
||||
chamber: Optional[str] = None
|
||||
district: Optional[str] = None
|
||||
photo_url: Optional[str] = None
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
# ── Bill Brief ────────────────────────────────────────────────────────────────
|
||||
|
||||
class BriefSchema(BaseModel):
|
||||
id: int
|
||||
brief_type: str = "full"
|
||||
summary: Optional[str] = None
|
||||
key_points: Optional[list[str]] = None
|
||||
risks: Optional[list[str]] = None
|
||||
deadlines: Optional[list[dict[str, Any]]] = None
|
||||
topic_tags: Optional[list[str]] = None
|
||||
llm_provider: Optional[str] = None
|
||||
llm_model: Optional[str] = None
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
# ── Bill Action ───────────────────────────────────────────────────────────────
|
||||
|
||||
class BillActionSchema(BaseModel):
|
||||
id: int
|
||||
action_date: Optional[date] = None
|
||||
action_text: Optional[str] = None
|
||||
action_type: Optional[str] = None
|
||||
chamber: Optional[str] = None
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
# ── News Article ──────────────────────────────────────────────────────────────
|
||||
|
||||
class NewsArticleSchema(BaseModel):
|
||||
id: int
|
||||
source: Optional[str] = None
|
||||
headline: Optional[str] = None
|
||||
url: Optional[str] = None
|
||||
published_at: Optional[datetime] = None
|
||||
relevance_score: Optional[float] = None
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
# ── Trend Score ───────────────────────────────────────────────────────────────
|
||||
|
||||
class TrendScoreSchema(BaseModel):
|
||||
score_date: date
|
||||
newsapi_count: int
|
||||
gnews_count: int
|
||||
gtrends_score: float
|
||||
composite_score: float
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
# ── Bill ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
class BillSchema(BaseModel):
|
||||
bill_id: str
|
||||
congress_number: int
|
||||
bill_type: str
|
||||
bill_number: int
|
||||
title: Optional[str] = None
|
||||
short_title: Optional[str] = None
|
||||
introduced_date: Optional[date] = None
|
||||
latest_action_date: Optional[date] = None
|
||||
latest_action_text: Optional[str] = None
|
||||
status: Optional[str] = None
|
||||
chamber: Optional[str] = None
|
||||
congress_url: Optional[str] = None
|
||||
sponsor: Optional[MemberSchema] = None
|
||||
latest_brief: Optional[BriefSchema] = None
|
||||
latest_trend: Optional[TrendScoreSchema] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class BillDetailSchema(BillSchema):
|
||||
actions: list[BillActionSchema] = []
|
||||
news_articles: list[NewsArticleSchema] = []
|
||||
trend_scores: list[TrendScoreSchema] = []
|
||||
briefs: list[BriefSchema] = []
|
||||
|
||||
|
||||
# ── Follow ────────────────────────────────────────────────────────────────────
|
||||
|
||||
class FollowCreate(BaseModel):
|
||||
follow_type: str # bill | member | topic
|
||||
follow_value: str
|
||||
|
||||
|
||||
class FollowSchema(BaseModel):
|
||||
id: int
|
||||
follow_type: str
|
||||
follow_value: str
|
||||
created_at: datetime
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
# ── Settings ──────────────────────────────────────────────────────────────────
|
||||
|
||||
class SettingUpdate(BaseModel):
|
||||
key: str
|
||||
value: str
|
||||
|
||||
|
||||
class SettingsResponse(BaseModel):
|
||||
llm_provider: str
|
||||
llm_model: str
|
||||
congress_poll_interval_minutes: int
|
||||
newsapi_enabled: bool
|
||||
pytrends_enabled: bool
|
||||
0
backend/app/services/__init__.py
Normal file
0
backend/app/services/__init__.py
Normal file
120
backend/app/services/congress_api.py
Normal file
120
backend/app/services/congress_api.py
Normal file
@@ -0,0 +1,120 @@
|
||||
"""
|
||||
Congress.gov API client.
|
||||
|
||||
Rate limit: 5,000 requests/hour (enforced server-side by Congress.gov).
|
||||
We track usage in Redis to stay well under the limit.
|
||||
"""
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
from tenacity import retry, stop_after_attempt, wait_exponential
|
||||
|
||||
from app.config import settings
|
||||
|
||||
BASE_URL = "https://api.congress.gov/v3"
|
||||
|
||||
|
||||
def _get_current_congress() -> int:
|
||||
"""Calculate the current Congress number. 119th started Jan 3, 2025."""
|
||||
year = datetime.utcnow().year
|
||||
# Congress changes on odd years (Jan 3)
|
||||
if datetime.utcnow().month == 1 and datetime.utcnow().day < 3:
|
||||
year -= 1
|
||||
return 118 + ((year - 2023) // 2 + (1 if year % 2 == 1 else 0))
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_exponential(min=1, max=10))
|
||||
def _get(endpoint: str, params: dict) -> dict:
|
||||
params["api_key"] = settings.DATA_GOV_API_KEY
|
||||
params["format"] = "json"
|
||||
response = requests.get(f"{BASE_URL}{endpoint}", params=params, timeout=30)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
def get_current_congress() -> int:
|
||||
return _get_current_congress()
|
||||
|
||||
|
||||
def build_bill_id(congress: int, bill_type: str, bill_number: int) -> str:
|
||||
return f"{congress}-{bill_type.lower()}-{bill_number}"
|
||||
|
||||
|
||||
def get_bills(
|
||||
congress: int,
|
||||
offset: int = 0,
|
||||
limit: int = 250,
|
||||
from_date_time: Optional[str] = None,
|
||||
) -> dict:
|
||||
params: dict = {"offset": offset, "limit": limit, "sort": "updateDate+desc"}
|
||||
if from_date_time:
|
||||
params["fromDateTime"] = from_date_time
|
||||
return _get(f"/bill/{congress}", params)
|
||||
|
||||
|
||||
def get_bill_detail(congress: int, bill_type: str, bill_number: int) -> dict:
|
||||
return _get(f"/bill/{congress}/{bill_type.lower()}/{bill_number}", {})
|
||||
|
||||
|
||||
def get_bill_actions(congress: int, bill_type: str, bill_number: int, offset: int = 0) -> dict:
|
||||
return _get(f"/bill/{congress}/{bill_type.lower()}/{bill_number}/actions", {"offset": offset, "limit": 250})
|
||||
|
||||
|
||||
def get_bill_text_versions(congress: int, bill_type: str, bill_number: int) -> dict:
|
||||
return _get(f"/bill/{congress}/{bill_type.lower()}/{bill_number}/text", {})
|
||||
|
||||
|
||||
def get_members(offset: int = 0, limit: int = 250, current_member: bool = True) -> dict:
|
||||
params: dict = {"offset": offset, "limit": limit}
|
||||
if current_member:
|
||||
params["currentMember"] = "true"
|
||||
return _get("/member", params)
|
||||
|
||||
|
||||
def get_member_detail(bioguide_id: str) -> dict:
|
||||
return _get(f"/member/{bioguide_id}", {})
|
||||
|
||||
|
||||
def get_committees(offset: int = 0, limit: int = 250) -> dict:
|
||||
return _get("/committee", {"offset": offset, "limit": limit})
|
||||
|
||||
|
||||
def parse_bill_from_api(data: dict, congress: int) -> dict:
|
||||
"""Normalize raw API bill data into our model fields."""
|
||||
bill_type = data.get("type", "").lower()
|
||||
bill_number = data.get("number", 0)
|
||||
latest_action = data.get("latestAction") or {}
|
||||
return {
|
||||
"bill_id": build_bill_id(congress, bill_type, bill_number),
|
||||
"congress_number": congress,
|
||||
"bill_type": bill_type,
|
||||
"bill_number": bill_number,
|
||||
"title": data.get("title"),
|
||||
"short_title": data.get("shortTitle"),
|
||||
"introduced_date": data.get("introducedDate"),
|
||||
"latest_action_date": latest_action.get("actionDate"),
|
||||
"latest_action_text": latest_action.get("text"),
|
||||
"status": latest_action.get("text", "")[:100] if latest_action.get("text") else None,
|
||||
"chamber": "House" if bill_type.startswith("h") else "Senate",
|
||||
"congress_url": data.get("url"),
|
||||
}
|
||||
|
||||
|
||||
def parse_member_from_api(data: dict) -> dict:
|
||||
"""Normalize raw API member data into our model fields."""
|
||||
terms = data.get("terms", {}).get("item", [])
|
||||
current_term = terms[-1] if terms else {}
|
||||
return {
|
||||
"bioguide_id": data.get("bioguideId"),
|
||||
"name": data.get("name", ""),
|
||||
"first_name": data.get("firstName"),
|
||||
"last_name": data.get("lastName"),
|
||||
"party": data.get("partyName") or None,
|
||||
"state": data.get("state"),
|
||||
"chamber": current_term.get("chamber"),
|
||||
"district": str(current_term.get("district")) if current_term.get("district") else None,
|
||||
"photo_url": data.get("depiction", {}).get("imageUrl"),
|
||||
"official_url": data.get("officialWebsiteUrl"),
|
||||
}
|
||||
95
backend/app/services/govinfo_api.py
Normal file
95
backend/app/services/govinfo_api.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""
|
||||
GovInfo API client for fetching actual bill text.
|
||||
|
||||
Priority order for text formats: htm > txt > pdf
|
||||
"""
|
||||
import logging
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from tenacity import retry, stop_after_attempt, wait_exponential
|
||||
|
||||
from app.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
GOVINFO_BASE = "https://api.govinfo.gov"
|
||||
FORMAT_PRIORITY = ["htm", "html", "txt", "pdf"]
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_exponential(min=2, max=15))
|
||||
def _get(url: str, params: dict = None) -> requests.Response:
|
||||
p = {"api_key": settings.DATA_GOV_API_KEY, **(params or {})}
|
||||
response = requests.get(url, params=p, timeout=60)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
|
||||
def get_package_summary(package_id: str) -> dict:
|
||||
response = _get(f"{GOVINFO_BASE}/packages/{package_id}/summary")
|
||||
return response.json()
|
||||
|
||||
|
||||
def get_package_content_detail(package_id: str) -> dict:
|
||||
response = _get(f"{GOVINFO_BASE}/packages/{package_id}/content-detail")
|
||||
return response.json()
|
||||
|
||||
|
||||
def find_best_text_url(text_versions: list[dict]) -> Optional[tuple[str, str]]:
|
||||
"""
|
||||
From a list of text version objects (from Congress.gov API), find the best
|
||||
available text format. Returns (url, format) or None.
|
||||
Matches by URL extension since Congress.gov type strings are "Formatted Text", "PDF", etc.
|
||||
"""
|
||||
for fmt in FORMAT_PRIORITY:
|
||||
for version in text_versions:
|
||||
for fmt_info in version.get("formats", []):
|
||||
if not isinstance(fmt_info, dict):
|
||||
continue
|
||||
url = fmt_info.get("url", "")
|
||||
if url.lower().endswith(f".{fmt}"):
|
||||
return url, fmt
|
||||
return None, None
|
||||
|
||||
|
||||
def fetch_text_from_url(url: str, fmt: str) -> Optional[str]:
|
||||
"""Download and extract plain text from a GovInfo document URL."""
|
||||
try:
|
||||
response = requests.get(url, timeout=120)
|
||||
response.raise_for_status()
|
||||
|
||||
if fmt in ("htm", "html"):
|
||||
return _extract_from_html(response.text)
|
||||
elif fmt == "txt":
|
||||
return response.text
|
||||
elif fmt == "pdf":
|
||||
return _extract_from_pdf(response.content)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to fetch text from {url}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _extract_from_html(html: str) -> str:
|
||||
"""Strip HTML tags and clean up whitespace."""
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
# Remove script/style tags
|
||||
for tag in soup(["script", "style", "nav", "header", "footer"]):
|
||||
tag.decompose()
|
||||
text = soup.get_text(separator="\n")
|
||||
# Collapse excessive whitespace
|
||||
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||
text = re.sub(r" {2,}", " ", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
def _extract_from_pdf(content: bytes) -> Optional[str]:
|
||||
"""Extract text from PDF bytes using pdfminer."""
|
||||
try:
|
||||
from io import BytesIO
|
||||
from pdfminer.high_level import extract_text as pdf_extract
|
||||
return pdf_extract(BytesIO(content))
|
||||
except Exception as e:
|
||||
logger.error(f"PDF extraction failed: {e}")
|
||||
return None
|
||||
327
backend/app/services/llm_service.py
Normal file
327
backend/app/services/llm_service.py
Normal file
@@ -0,0 +1,327 @@
|
||||
"""
|
||||
LLM provider abstraction.
|
||||
|
||||
All providers implement generate_brief(doc_text, bill_metadata) -> ReverseBrief.
|
||||
Select provider via LLM_PROVIDER env var.
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from app.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SYSTEM_PROMPT = """You are a nonpartisan legislative analyst specializing in translating complex \
|
||||
legislation into clear, accurate summaries for informed citizens. You analyze bills objectively \
|
||||
without political bias.
|
||||
|
||||
Always respond with valid JSON matching exactly this schema:
|
||||
{
|
||||
"summary": "2-4 paragraph plain-language summary of what this bill does",
|
||||
"key_points": ["specific concrete fact 1", "specific concrete fact 2"],
|
||||
"risks": ["legitimate concern or challenge 1", "legitimate concern 2"],
|
||||
"deadlines": [{"date": "YYYY-MM-DD or null", "description": "what happens on this date"}],
|
||||
"topic_tags": ["healthcare", "taxation"]
|
||||
}
|
||||
|
||||
Rules:
|
||||
- summary: Explain WHAT the bill does, not whether it is good or bad. Be factual and complete.
|
||||
- key_points: 5-10 specific, concrete things the bill changes, authorizes, or appropriates.
|
||||
- risks: Legitimate concerns from any perspective — costs, implementation challenges, \
|
||||
constitutional questions, unintended consequences. Include at least 2 even for benign bills.
|
||||
- deadlines: Only include if explicitly stated in the text. Use null for date if a deadline \
|
||||
is mentioned without a specific date. Empty list if none.
|
||||
- topic_tags: 3-8 lowercase tags. Prefer these standard tags: healthcare, taxation, defense, \
|
||||
education, immigration, environment, housing, infrastructure, technology, agriculture, judiciary, \
|
||||
foreign-policy, veterans, social-security, trade, budget, energy, banking, transportation, \
|
||||
public-lands, labor, civil-rights, science.
|
||||
|
||||
Respond with ONLY valid JSON. No preamble, no explanation, no markdown code blocks."""
|
||||
|
||||
MAX_TOKENS_DEFAULT = 6000
|
||||
MAX_TOKENS_OLLAMA = 3000
|
||||
TOKENS_PER_CHAR = 0.25 # rough approximation: 4 chars ≈ 1 token
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReverseBrief:
|
||||
summary: str
|
||||
key_points: list[str]
|
||||
risks: list[str]
|
||||
deadlines: list[dict]
|
||||
topic_tags: list[str]
|
||||
llm_provider: str
|
||||
llm_model: str
|
||||
|
||||
|
||||
def smart_truncate(text: str, max_tokens: int) -> str:
|
||||
"""Truncate bill text intelligently if it exceeds token budget."""
|
||||
approx_tokens = len(text) * TOKENS_PER_CHAR
|
||||
if approx_tokens <= max_tokens:
|
||||
return text
|
||||
|
||||
# Keep first 75% of budget for the preamble (purpose section)
|
||||
# and last 25% for effective dates / enforcement sections
|
||||
preamble_chars = int(max_tokens * 0.75 / TOKENS_PER_CHAR)
|
||||
tail_chars = int(max_tokens * 0.25 / TOKENS_PER_CHAR)
|
||||
omitted_chars = len(text) - preamble_chars - tail_chars
|
||||
|
||||
return (
|
||||
text[:preamble_chars]
|
||||
+ f"\n\n[... {omitted_chars:,} characters omitted for length ...]\n\n"
|
||||
+ text[-tail_chars:]
|
||||
)
|
||||
|
||||
|
||||
AMENDMENT_SYSTEM_PROMPT = """You are a nonpartisan legislative analyst. A bill has been updated \
|
||||
and you must summarize what changed between the previous and new version.
|
||||
|
||||
Always respond with valid JSON matching exactly this schema:
|
||||
{
|
||||
"summary": "2-3 paragraph plain-language description of what changed in this version",
|
||||
"key_points": ["specific change 1", "specific change 2"],
|
||||
"risks": ["new concern introduced by this change 1", "concern 2"],
|
||||
"deadlines": [{"date": "YYYY-MM-DD or null", "description": "new deadline added"}],
|
||||
"topic_tags": ["healthcare", "taxation"]
|
||||
}
|
||||
|
||||
Rules:
|
||||
- summary: Focus ONLY on what is different from the previous version. Be specific.
|
||||
- key_points: List concrete additions, removals, or modifications in this version.
|
||||
- risks: Only include risks that are new or changed relative to the previous version.
|
||||
- deadlines: Only new or changed deadlines. Empty list if none.
|
||||
- topic_tags: Same standard tags as before — include any new topics this version adds.
|
||||
|
||||
Respond with ONLY valid JSON. No preamble, no explanation, no markdown code blocks."""
|
||||
|
||||
|
||||
def build_amendment_prompt(new_text: str, previous_text: str, bill_metadata: dict, max_tokens: int) -> str:
|
||||
half = max_tokens // 2
|
||||
truncated_new = smart_truncate(new_text, half)
|
||||
truncated_prev = smart_truncate(previous_text, half)
|
||||
return f"""A bill has been updated. Summarize what changed between the previous and new version.
|
||||
|
||||
BILL METADATA:
|
||||
- Title: {bill_metadata.get('title', 'Unknown')}
|
||||
- Sponsor: {bill_metadata.get('sponsor_name', 'Unknown')} \
|
||||
({bill_metadata.get('party', '?')}-{bill_metadata.get('state', '?')})
|
||||
- Latest Action: {bill_metadata.get('latest_action_text', 'None')} \
|
||||
({bill_metadata.get('latest_action_date', 'Unknown')})
|
||||
|
||||
PREVIOUS VERSION:
|
||||
{truncated_prev}
|
||||
|
||||
NEW VERSION:
|
||||
{truncated_new}
|
||||
|
||||
Produce the JSON amendment summary now:"""
|
||||
|
||||
|
||||
def build_prompt(doc_text: str, bill_metadata: dict, max_tokens: int) -> str:
|
||||
truncated = smart_truncate(doc_text, max_tokens)
|
||||
return f"""Analyze this legislation and produce a structured brief.
|
||||
|
||||
BILL METADATA:
|
||||
- Title: {bill_metadata.get('title', 'Unknown')}
|
||||
- Sponsor: {bill_metadata.get('sponsor_name', 'Unknown')} \
|
||||
({bill_metadata.get('party', '?')}-{bill_metadata.get('state', '?')})
|
||||
- Introduced: {bill_metadata.get('introduced_date', 'Unknown')}
|
||||
- Chamber: {bill_metadata.get('chamber', 'Unknown')}
|
||||
- Latest Action: {bill_metadata.get('latest_action_text', 'None')} \
|
||||
({bill_metadata.get('latest_action_date', 'Unknown')})
|
||||
|
||||
BILL TEXT:
|
||||
{truncated}
|
||||
|
||||
Produce the JSON brief now:"""
|
||||
|
||||
|
||||
def parse_brief_json(raw: str | dict, provider: str, model: str) -> ReverseBrief:
|
||||
"""Parse and validate LLM JSON response into a ReverseBrief."""
|
||||
if isinstance(raw, str):
|
||||
# Strip markdown code fences if present
|
||||
raw = re.sub(r"^```(?:json)?\s*", "", raw.strip())
|
||||
raw = re.sub(r"\s*```$", "", raw.strip())
|
||||
data = json.loads(raw)
|
||||
else:
|
||||
data = raw
|
||||
|
||||
return ReverseBrief(
|
||||
summary=str(data.get("summary", "")),
|
||||
key_points=list(data.get("key_points", [])),
|
||||
risks=list(data.get("risks", [])),
|
||||
deadlines=list(data.get("deadlines", [])),
|
||||
topic_tags=list(data.get("topic_tags", [])),
|
||||
llm_provider=provider,
|
||||
llm_model=model,
|
||||
)
|
||||
|
||||
|
||||
class LLMProvider(ABC):
|
||||
@abstractmethod
|
||||
def generate_brief(self, doc_text: str, bill_metadata: dict) -> ReverseBrief:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def generate_amendment_brief(self, new_text: str, previous_text: str, bill_metadata: dict) -> ReverseBrief:
|
||||
pass
|
||||
|
||||
|
||||
class OpenAIProvider(LLMProvider):
|
||||
def __init__(self):
|
||||
from openai import OpenAI
|
||||
self.client = OpenAI(api_key=settings.OPENAI_API_KEY)
|
||||
self.model = settings.OPENAI_MODEL
|
||||
|
||||
def generate_brief(self, doc_text: str, bill_metadata: dict) -> ReverseBrief:
|
||||
prompt = build_prompt(doc_text, bill_metadata, MAX_TOKENS_DEFAULT)
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
response_format={"type": "json_object"},
|
||||
temperature=0.1,
|
||||
)
|
||||
raw = response.choices[0].message.content
|
||||
return parse_brief_json(raw, "openai", self.model)
|
||||
|
||||
def generate_amendment_brief(self, new_text: str, previous_text: str, bill_metadata: dict) -> ReverseBrief:
|
||||
prompt = build_amendment_prompt(new_text, previous_text, bill_metadata, MAX_TOKENS_DEFAULT)
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": AMENDMENT_SYSTEM_PROMPT},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
response_format={"type": "json_object"},
|
||||
temperature=0.1,
|
||||
)
|
||||
raw = response.choices[0].message.content
|
||||
return parse_brief_json(raw, "openai", self.model)
|
||||
|
||||
|
||||
class AnthropicProvider(LLMProvider):
|
||||
def __init__(self):
|
||||
import anthropic
|
||||
self.client = anthropic.Anthropic(api_key=settings.ANTHROPIC_API_KEY)
|
||||
self.model = settings.ANTHROPIC_MODEL
|
||||
|
||||
def generate_brief(self, doc_text: str, bill_metadata: dict) -> ReverseBrief:
|
||||
prompt = build_prompt(doc_text, bill_metadata, MAX_TOKENS_DEFAULT)
|
||||
response = self.client.messages.create(
|
||||
model=self.model,
|
||||
max_tokens=4096,
|
||||
system=SYSTEM_PROMPT + "\n\nIMPORTANT: Respond with ONLY valid JSON. No other text.",
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
raw = response.content[0].text
|
||||
return parse_brief_json(raw, "anthropic", self.model)
|
||||
|
||||
def generate_amendment_brief(self, new_text: str, previous_text: str, bill_metadata: dict) -> ReverseBrief:
|
||||
prompt = build_amendment_prompt(new_text, previous_text, bill_metadata, MAX_TOKENS_DEFAULT)
|
||||
response = self.client.messages.create(
|
||||
model=self.model,
|
||||
max_tokens=4096,
|
||||
system=AMENDMENT_SYSTEM_PROMPT + "\n\nIMPORTANT: Respond with ONLY valid JSON. No other text.",
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
raw = response.content[0].text
|
||||
return parse_brief_json(raw, "anthropic", self.model)
|
||||
|
||||
|
||||
class GeminiProvider(LLMProvider):
|
||||
def __init__(self):
|
||||
import google.generativeai as genai
|
||||
genai.configure(api_key=settings.GEMINI_API_KEY)
|
||||
self._genai = genai
|
||||
self.model_name = settings.GEMINI_MODEL
|
||||
|
||||
def _make_model(self, system_prompt: str):
|
||||
return self._genai.GenerativeModel(
|
||||
model_name=self.model_name,
|
||||
generation_config={"response_mime_type": "application/json", "temperature": 0.1},
|
||||
system_instruction=system_prompt,
|
||||
)
|
||||
|
||||
def generate_brief(self, doc_text: str, bill_metadata: dict) -> ReverseBrief:
|
||||
prompt = build_prompt(doc_text, bill_metadata, MAX_TOKENS_DEFAULT)
|
||||
response = self._make_model(SYSTEM_PROMPT).generate_content(prompt)
|
||||
return parse_brief_json(response.text, "gemini", self.model_name)
|
||||
|
||||
def generate_amendment_brief(self, new_text: str, previous_text: str, bill_metadata: dict) -> ReverseBrief:
|
||||
prompt = build_amendment_prompt(new_text, previous_text, bill_metadata, MAX_TOKENS_DEFAULT)
|
||||
response = self._make_model(AMENDMENT_SYSTEM_PROMPT).generate_content(prompt)
|
||||
return parse_brief_json(response.text, "gemini", self.model_name)
|
||||
|
||||
|
||||
class OllamaProvider(LLMProvider):
|
||||
def __init__(self):
|
||||
self.base_url = settings.OLLAMA_BASE_URL.rstrip("/")
|
||||
self.model = settings.OLLAMA_MODEL
|
||||
|
||||
def _generate(self, system_prompt: str, user_prompt: str) -> str:
|
||||
import requests as req
|
||||
full_prompt = f"{system_prompt}\n\n{user_prompt}"
|
||||
response = req.post(
|
||||
f"{self.base_url}/api/generate",
|
||||
json={"model": self.model, "prompt": full_prompt, "stream": False, "format": "json"},
|
||||
timeout=300,
|
||||
)
|
||||
response.raise_for_status()
|
||||
raw = response.json().get("response", "")
|
||||
try:
|
||||
return raw
|
||||
except Exception:
|
||||
strict = f"{full_prompt}\n\nCRITICAL: Your response MUST be valid JSON only."
|
||||
r2 = req.post(
|
||||
f"{self.base_url}/api/generate",
|
||||
json={"model": self.model, "prompt": strict, "stream": False, "format": "json"},
|
||||
timeout=300,
|
||||
)
|
||||
r2.raise_for_status()
|
||||
return r2.json().get("response", "")
|
||||
|
||||
def generate_brief(self, doc_text: str, bill_metadata: dict) -> ReverseBrief:
|
||||
prompt = build_prompt(doc_text, bill_metadata, MAX_TOKENS_OLLAMA)
|
||||
raw = self._generate(SYSTEM_PROMPT, prompt)
|
||||
try:
|
||||
return parse_brief_json(raw, "ollama", self.model)
|
||||
except (json.JSONDecodeError, KeyError) as e:
|
||||
logger.warning(f"Ollama JSON parse failed, retrying: {e}")
|
||||
raw2 = self._generate(
|
||||
SYSTEM_PROMPT,
|
||||
prompt + "\n\nCRITICAL: Your response MUST be valid JSON only. No text before or after the JSON object."
|
||||
)
|
||||
return parse_brief_json(raw2, "ollama", self.model)
|
||||
|
||||
def generate_amendment_brief(self, new_text: str, previous_text: str, bill_metadata: dict) -> ReverseBrief:
|
||||
prompt = build_amendment_prompt(new_text, previous_text, bill_metadata, MAX_TOKENS_OLLAMA)
|
||||
raw = self._generate(AMENDMENT_SYSTEM_PROMPT, prompt)
|
||||
try:
|
||||
return parse_brief_json(raw, "ollama", self.model)
|
||||
except (json.JSONDecodeError, KeyError) as e:
|
||||
logger.warning(f"Ollama amendment JSON parse failed, retrying: {e}")
|
||||
raw2 = self._generate(
|
||||
AMENDMENT_SYSTEM_PROMPT,
|
||||
prompt + "\n\nCRITICAL: Your response MUST be valid JSON only. No text before or after the JSON object."
|
||||
)
|
||||
return parse_brief_json(raw2, "ollama", self.model)
|
||||
|
||||
|
||||
def get_llm_provider() -> LLMProvider:
|
||||
"""Factory — returns the configured LLM provider."""
|
||||
provider = settings.LLM_PROVIDER.lower()
|
||||
if provider == "openai":
|
||||
return OpenAIProvider()
|
||||
elif provider == "anthropic":
|
||||
return AnthropicProvider()
|
||||
elif provider == "gemini":
|
||||
return GeminiProvider()
|
||||
elif provider == "ollama":
|
||||
return OllamaProvider()
|
||||
raise ValueError(f"Unknown LLM_PROVIDER: '{provider}'. Must be one of: openai, anthropic, gemini, ollama")
|
||||
89
backend/app/services/news_service.py
Normal file
89
backend/app/services/news_service.py
Normal file
@@ -0,0 +1,89 @@
|
||||
"""
|
||||
News correlation service.
|
||||
|
||||
- NewsAPI.org: structured news articles per bill (100 req/day limit)
|
||||
- Google News RSS: volume signal for zeitgeist scoring (no limit)
|
||||
"""
|
||||
import logging
|
||||
import time
|
||||
import urllib.parse
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional
|
||||
|
||||
import feedparser
|
||||
import requests
|
||||
from tenacity import retry, stop_after_attempt, wait_exponential
|
||||
|
||||
from app.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
NEWSAPI_BASE = "https://newsapi.org/v2"
|
||||
GOOGLE_NEWS_RSS = "https://news.google.com/rss/search"
|
||||
NEWSAPI_DAILY_LIMIT = 95 # Leave 5 as buffer
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(2), wait=wait_exponential(min=1, max=5))
|
||||
def _newsapi_get(endpoint: str, params: dict) -> dict:
|
||||
params["apiKey"] = settings.NEWSAPI_KEY
|
||||
response = requests.get(f"{NEWSAPI_BASE}/{endpoint}", params=params, timeout=30)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
def build_news_query(bill_title: str, short_title: Optional[str], sponsor_name: Optional[str],
|
||||
bill_type: str, bill_number: int) -> str:
|
||||
"""Build a NewsAPI search query for a bill."""
|
||||
terms = []
|
||||
if short_title:
|
||||
terms.append(f'"{short_title}"')
|
||||
elif bill_title:
|
||||
# Use first 6 words of title as phrase
|
||||
words = bill_title.split()[:6]
|
||||
if len(words) >= 3:
|
||||
terms.append(f'"{" ".join(words)}"')
|
||||
# Add bill number as fallback
|
||||
terms.append(f'"{bill_type.upper()} {bill_number}"')
|
||||
return " OR ".join(terms[:2]) # Keep queries short for relevance
|
||||
|
||||
|
||||
def fetch_newsapi_articles(query: str, days: int = 30) -> list[dict]:
|
||||
"""Fetch articles from NewsAPI.org. Returns empty list if quota is exhausted or key not set."""
|
||||
if not settings.NEWSAPI_KEY:
|
||||
return []
|
||||
try:
|
||||
from_date = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%d")
|
||||
data = _newsapi_get("everything", {
|
||||
"q": query,
|
||||
"language": "en",
|
||||
"sortBy": "relevancy",
|
||||
"pageSize": 10,
|
||||
"from": from_date,
|
||||
})
|
||||
articles = data.get("articles", [])
|
||||
return [
|
||||
{
|
||||
"source": a.get("source", {}).get("name", ""),
|
||||
"headline": a.get("title", ""),
|
||||
"url": a.get("url", ""),
|
||||
"published_at": a.get("publishedAt"),
|
||||
}
|
||||
for a in articles
|
||||
if a.get("url") and a.get("title")
|
||||
]
|
||||
except Exception as e:
|
||||
logger.error(f"NewsAPI fetch failed: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def fetch_gnews_count(query: str, days: int = 30) -> int:
|
||||
"""Count articles in Google News RSS for the past N days. Used as volume signal."""
|
||||
try:
|
||||
encoded = urllib.parse.quote(f"{query} when:{days}d")
|
||||
url = f"{GOOGLE_NEWS_RSS}?q={encoded}&hl=en-US&gl=US&ceid=US:en"
|
||||
time.sleep(1) # Polite delay
|
||||
feed = feedparser.parse(url)
|
||||
return len(feed.entries)
|
||||
except Exception as e:
|
||||
logger.error(f"Google News RSS fetch failed: {e}")
|
||||
return 0
|
||||
64
backend/app/services/trends_service.py
Normal file
64
backend/app/services/trends_service.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""
|
||||
Google Trends service (via pytrends).
|
||||
|
||||
pytrends is unofficial web scraping — Google blocks it sporadically.
|
||||
All calls are wrapped in try/except and return 0 on any failure.
|
||||
"""
|
||||
import logging
|
||||
import random
|
||||
import time
|
||||
|
||||
from app.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_trends_score(keywords: list[str]) -> float:
|
||||
"""
|
||||
Return a 0–100 interest score for the given keywords over the past 90 days.
|
||||
Returns 0.0 on any failure (rate limit, empty data, exception).
|
||||
"""
|
||||
if not settings.PYTRENDS_ENABLED or not keywords:
|
||||
return 0.0
|
||||
try:
|
||||
from pytrends.request import TrendReq
|
||||
|
||||
# Jitter to avoid detection as bot
|
||||
time.sleep(random.uniform(2.0, 5.0))
|
||||
|
||||
pytrends = TrendReq(hl="en-US", tz=0, timeout=(10, 25))
|
||||
kw_list = [k for k in keywords[:5] if k] # max 5 keywords
|
||||
if not kw_list:
|
||||
return 0.0
|
||||
|
||||
pytrends.build_payload(kw_list, timeframe="today 3-m", geo="US")
|
||||
data = pytrends.interest_over_time()
|
||||
|
||||
if data is None or data.empty:
|
||||
return 0.0
|
||||
|
||||
# Average the most recent 14 data points for the primary keyword
|
||||
primary = kw_list[0]
|
||||
if primary not in data.columns:
|
||||
return 0.0
|
||||
|
||||
recent = data[primary].tail(14)
|
||||
return float(recent.mean())
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"pytrends failed (non-critical): {e}")
|
||||
return 0.0
|
||||
|
||||
|
||||
def keywords_for_bill(title: str, short_title: str, topic_tags: list[str]) -> list[str]:
|
||||
"""Extract meaningful search keywords for a bill."""
|
||||
keywords = []
|
||||
if short_title:
|
||||
keywords.append(short_title)
|
||||
elif title:
|
||||
# Use first 5 words of title
|
||||
words = title.split()[:5]
|
||||
if len(words) >= 2:
|
||||
keywords.append(" ".join(words))
|
||||
keywords.extend(tag.replace("-", " ") for tag in (topic_tags or [])[:3])
|
||||
return keywords[:5]
|
||||
0
backend/app/workers/__init__.py
Normal file
0
backend/app/workers/__init__.py
Normal file
62
backend/app/workers/celery_app.py
Normal file
62
backend/app/workers/celery_app.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from celery import Celery
|
||||
from celery.schedules import crontab
|
||||
from kombu import Queue
|
||||
|
||||
from app.config import settings
|
||||
|
||||
celery_app = Celery(
|
||||
"pocketveto",
|
||||
broker=settings.REDIS_URL,
|
||||
backend=settings.REDIS_URL,
|
||||
include=[
|
||||
"app.workers.congress_poller",
|
||||
"app.workers.document_fetcher",
|
||||
"app.workers.llm_processor",
|
||||
"app.workers.news_fetcher",
|
||||
"app.workers.trend_scorer",
|
||||
],
|
||||
)
|
||||
|
||||
celery_app.conf.update(
|
||||
task_serializer="json",
|
||||
result_serializer="json",
|
||||
accept_content=["json"],
|
||||
timezone="UTC",
|
||||
enable_utc=True,
|
||||
# Late ack: task is only removed from queue after completion, not on pickup.
|
||||
# Combined with idempotent tasks, this ensures no work is lost if a worker crashes.
|
||||
task_acks_late=True,
|
||||
# Prevent workers from prefetching LLM tasks and blocking other workers.
|
||||
worker_prefetch_multiplier=1,
|
||||
# Route tasks to named queues
|
||||
task_routes={
|
||||
"app.workers.congress_poller.*": {"queue": "polling"},
|
||||
"app.workers.document_fetcher.*": {"queue": "documents"},
|
||||
"app.workers.llm_processor.*": {"queue": "llm"},
|
||||
"app.workers.news_fetcher.*": {"queue": "news"},
|
||||
"app.workers.trend_scorer.*": {"queue": "news"},
|
||||
},
|
||||
task_queues=[
|
||||
Queue("polling"),
|
||||
Queue("documents"),
|
||||
Queue("llm"),
|
||||
Queue("news"),
|
||||
],
|
||||
# RedBeat stores schedule in Redis — restart-safe and dynamically updatable
|
||||
redbeat_redis_url=settings.REDIS_URL,
|
||||
beat_scheduler="redbeat.RedBeatScheduler",
|
||||
beat_schedule={
|
||||
"poll-congress-bills": {
|
||||
"task": "app.workers.congress_poller.poll_congress_bills",
|
||||
"schedule": crontab(minute=f"*/{settings.CONGRESS_POLL_INTERVAL_MINUTES}"),
|
||||
},
|
||||
"fetch-news-active-bills": {
|
||||
"task": "app.workers.news_fetcher.fetch_news_for_active_bills",
|
||||
"schedule": crontab(hour="*/6", minute=0),
|
||||
},
|
||||
"calculate-trend-scores": {
|
||||
"task": "app.workers.trend_scorer.calculate_all_trend_scores",
|
||||
"schedule": crontab(hour=2, minute=0),
|
||||
},
|
||||
},
|
||||
)
|
||||
172
backend/app/workers/congress_poller.py
Normal file
172
backend/app/workers/congress_poller.py
Normal file
@@ -0,0 +1,172 @@
|
||||
"""
|
||||
Congress.gov poller — incremental bill and member sync.
|
||||
|
||||
Runs on Celery Beat schedule (every 30 min by default).
|
||||
Uses fromDateTime to fetch only recently updated bills.
|
||||
All operations are idempotent.
|
||||
"""
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from app.database import get_sync_db
|
||||
from app.models import Bill, BillAction, Member, AppSetting
|
||||
from app.services import congress_api
|
||||
from app.workers.celery_app import celery_app
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _get_setting(db, key: str, default=None) -> str | None:
|
||||
row = db.get(AppSetting, key)
|
||||
return row.value if row else default
|
||||
|
||||
|
||||
def _set_setting(db, key: str, value: str) -> None:
|
||||
row = db.get(AppSetting, key)
|
||||
if row:
|
||||
row.value = value
|
||||
else:
|
||||
db.add(AppSetting(key=key, value=value))
|
||||
db.commit()
|
||||
|
||||
|
||||
@celery_app.task(bind=True, max_retries=3, name="app.workers.congress_poller.poll_congress_bills")
|
||||
def poll_congress_bills(self):
|
||||
"""Fetch recently updated bills from Congress.gov and enqueue document + LLM processing."""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
last_polled = _get_setting(db, "congress_last_polled_at")
|
||||
current_congress = congress_api.get_current_congress()
|
||||
logger.info(f"Polling Congress {current_congress} (since {last_polled})")
|
||||
|
||||
new_count = 0
|
||||
updated_count = 0
|
||||
offset = 0
|
||||
|
||||
while True:
|
||||
response = congress_api.get_bills(
|
||||
congress=current_congress,
|
||||
offset=offset,
|
||||
limit=250,
|
||||
from_date_time=last_polled,
|
||||
)
|
||||
bills_data = response.get("bills", [])
|
||||
if not bills_data:
|
||||
break
|
||||
|
||||
for bill_data in bills_data:
|
||||
parsed = congress_api.parse_bill_from_api(bill_data, current_congress)
|
||||
bill_id = parsed["bill_id"]
|
||||
existing = db.get(Bill, bill_id)
|
||||
|
||||
if existing is None:
|
||||
# Upsert sponsor member if referenced
|
||||
sponsor_id = _sync_sponsor(db, bill_data)
|
||||
parsed["sponsor_id"] = sponsor_id
|
||||
parsed["last_checked_at"] = datetime.now(timezone.utc)
|
||||
db.add(Bill(**parsed))
|
||||
db.commit()
|
||||
new_count += 1
|
||||
# Enqueue document fetch
|
||||
from app.workers.document_fetcher import fetch_bill_documents
|
||||
fetch_bill_documents.delay(bill_id)
|
||||
else:
|
||||
_update_bill_if_changed(db, existing, parsed)
|
||||
updated_count += 1
|
||||
|
||||
db.commit()
|
||||
offset += 250
|
||||
if len(bills_data) < 250:
|
||||
break
|
||||
|
||||
# Update last polled timestamp
|
||||
_set_setting(db, "congress_last_polled_at", datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"))
|
||||
logger.info(f"Poll complete: {new_count} new, {updated_count} updated")
|
||||
return {"new": new_count, "updated": updated_count}
|
||||
|
||||
except Exception as exc:
|
||||
db.rollback()
|
||||
logger.error(f"Poll failed: {exc}")
|
||||
raise self.retry(exc=exc, countdown=60)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@celery_app.task(bind=True, max_retries=3, name="app.workers.congress_poller.sync_members")
|
||||
def sync_members(self):
|
||||
"""Sync current Congress members."""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
offset = 0
|
||||
synced = 0
|
||||
while True:
|
||||
response = congress_api.get_members(offset=offset, limit=250, current_member=True)
|
||||
members_data = response.get("members", [])
|
||||
if not members_data:
|
||||
break
|
||||
|
||||
for member_data in members_data:
|
||||
parsed = congress_api.parse_member_from_api(member_data)
|
||||
if not parsed.get("bioguide_id"):
|
||||
continue
|
||||
existing = db.get(Member, parsed["bioguide_id"])
|
||||
if existing is None:
|
||||
db.add(Member(**parsed))
|
||||
else:
|
||||
for k, v in parsed.items():
|
||||
setattr(existing, k, v)
|
||||
synced += 1
|
||||
|
||||
db.commit()
|
||||
offset += 250
|
||||
if len(members_data) < 250:
|
||||
break
|
||||
|
||||
logger.info(f"Synced {synced} members")
|
||||
return {"synced": synced}
|
||||
except Exception as exc:
|
||||
db.rollback()
|
||||
raise self.retry(exc=exc, countdown=120)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _sync_sponsor(db, bill_data: dict) -> str | None:
|
||||
"""Ensure the bill sponsor exists in the members table. Returns bioguide_id or None."""
|
||||
sponsors = bill_data.get("sponsors", [])
|
||||
if not sponsors:
|
||||
return None
|
||||
sponsor_raw = sponsors[0]
|
||||
bioguide_id = sponsor_raw.get("bioguideId")
|
||||
if not bioguide_id:
|
||||
return None
|
||||
existing = db.get(Member, bioguide_id)
|
||||
if existing is None:
|
||||
db.add(Member(
|
||||
bioguide_id=bioguide_id,
|
||||
name=sponsor_raw.get("fullName", ""),
|
||||
first_name=sponsor_raw.get("firstName"),
|
||||
last_name=sponsor_raw.get("lastName"),
|
||||
party=sponsor_raw.get("party", "")[:10] if sponsor_raw.get("party") else None,
|
||||
state=sponsor_raw.get("state"),
|
||||
))
|
||||
db.commit()
|
||||
return bioguide_id
|
||||
|
||||
|
||||
def _update_bill_if_changed(db, existing: Bill, parsed: dict) -> bool:
|
||||
"""Update bill fields if anything has changed. Returns True if updated."""
|
||||
changed = False
|
||||
track_fields = ["title", "short_title", "latest_action_date", "latest_action_text", "status"]
|
||||
for field in track_fields:
|
||||
new_val = parsed.get(field)
|
||||
if new_val and getattr(existing, field) != new_val:
|
||||
setattr(existing, field, new_val)
|
||||
changed = True
|
||||
if changed:
|
||||
existing.last_checked_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
# Check for new text versions now that the bill has changed
|
||||
from app.workers.document_fetcher import fetch_bill_documents
|
||||
fetch_bill_documents.delay(existing.bill_id)
|
||||
return changed
|
||||
87
backend/app/workers/document_fetcher.py
Normal file
87
backend/app/workers/document_fetcher.py
Normal file
@@ -0,0 +1,87 @@
|
||||
"""
|
||||
Document fetcher — retrieves bill text from GovInfo and stores it.
|
||||
Triggered by congress_poller when a new bill is detected.
|
||||
"""
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from app.database import get_sync_db
|
||||
from app.models import Bill, BillDocument
|
||||
from app.services import congress_api, govinfo_api
|
||||
from app.workers.celery_app import celery_app
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@celery_app.task(bind=True, max_retries=3, name="app.workers.document_fetcher.fetch_bill_documents")
|
||||
def fetch_bill_documents(self, bill_id: str):
|
||||
"""Fetch bill text from GovInfo and store it. Then enqueue LLM processing."""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
bill = db.get(Bill, bill_id)
|
||||
if not bill:
|
||||
logger.warning(f"Bill {bill_id} not found in DB")
|
||||
return {"status": "not_found"}
|
||||
|
||||
# Get text versions from Congress.gov
|
||||
try:
|
||||
text_response = congress_api.get_bill_text_versions(
|
||||
bill.congress_number, bill.bill_type, bill.bill_number
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"No text versions for {bill_id}: {e}")
|
||||
return {"status": "no_text_versions"}
|
||||
|
||||
text_versions = text_response.get("textVersions", [])
|
||||
if not text_versions:
|
||||
return {"status": "no_text_versions"}
|
||||
|
||||
url, fmt = govinfo_api.find_best_text_url(text_versions)
|
||||
if not url:
|
||||
return {"status": "no_suitable_format"}
|
||||
|
||||
# Idempotency: skip if we already have this exact document version
|
||||
existing = (
|
||||
db.query(BillDocument)
|
||||
.filter_by(bill_id=bill_id, govinfo_url=url)
|
||||
.filter(BillDocument.raw_text.isnot(None))
|
||||
.first()
|
||||
)
|
||||
if existing:
|
||||
return {"status": "already_fetched", "bill_id": bill_id}
|
||||
|
||||
logger.info(f"Fetching {bill_id} document ({fmt}) from {url}")
|
||||
raw_text = govinfo_api.fetch_text_from_url(url, fmt)
|
||||
if not raw_text:
|
||||
raise ValueError(f"Empty text returned for {bill_id}")
|
||||
|
||||
# Get version label from first text version
|
||||
type_obj = text_versions[0].get("type", {}) if text_versions else {}
|
||||
doc_version = type_obj.get("name") if isinstance(type_obj, dict) else type_obj
|
||||
|
||||
doc = BillDocument(
|
||||
bill_id=bill_id,
|
||||
doc_type="bill_text",
|
||||
doc_version=doc_version,
|
||||
govinfo_url=url,
|
||||
raw_text=raw_text,
|
||||
fetched_at=datetime.now(timezone.utc),
|
||||
)
|
||||
db.add(doc)
|
||||
db.commit()
|
||||
db.refresh(doc)
|
||||
|
||||
logger.info(f"Stored document {doc.id} for bill {bill_id} ({len(raw_text):,} chars)")
|
||||
|
||||
# Enqueue LLM processing
|
||||
from app.workers.llm_processor import process_document_with_llm
|
||||
process_document_with_llm.delay(doc.id)
|
||||
|
||||
return {"status": "ok", "document_id": doc.id, "chars": len(raw_text)}
|
||||
|
||||
except Exception as exc:
|
||||
db.rollback()
|
||||
logger.error(f"Document fetch failed for {bill_id}: {exc}")
|
||||
raise self.retry(exc=exc, countdown=120)
|
||||
finally:
|
||||
db.close()
|
||||
107
backend/app/workers/llm_processor.py
Normal file
107
backend/app/workers/llm_processor.py
Normal file
@@ -0,0 +1,107 @@
|
||||
"""
|
||||
LLM processor — generates AI briefs for fetched bill documents.
|
||||
Triggered by document_fetcher after successful text retrieval.
|
||||
"""
|
||||
import logging
|
||||
|
||||
from app.database import get_sync_db
|
||||
from app.models import Bill, BillBrief, BillDocument, Member
|
||||
from app.services.llm_service import get_llm_provider
|
||||
from app.workers.celery_app import celery_app
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@celery_app.task(
|
||||
bind=True,
|
||||
max_retries=2,
|
||||
rate_limit="10/m", # Respect LLM provider rate limits
|
||||
name="app.workers.llm_processor.process_document_with_llm",
|
||||
)
|
||||
def process_document_with_llm(self, document_id: int):
|
||||
"""Generate an AI brief for a bill document. Full brief for first version, amendment brief for subsequent versions."""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
# Idempotency: skip if brief already exists for this document
|
||||
existing = db.query(BillBrief).filter_by(document_id=document_id).first()
|
||||
if existing:
|
||||
return {"status": "already_processed", "brief_id": existing.id}
|
||||
|
||||
doc = db.get(BillDocument, document_id)
|
||||
if not doc or not doc.raw_text:
|
||||
logger.warning(f"Document {document_id} not found or has no text")
|
||||
return {"status": "no_document"}
|
||||
|
||||
bill = db.get(Bill, doc.bill_id)
|
||||
if not bill:
|
||||
return {"status": "no_bill"}
|
||||
|
||||
sponsor = db.get(Member, bill.sponsor_id) if bill.sponsor_id else None
|
||||
|
||||
bill_metadata = {
|
||||
"title": bill.title or "Unknown Title",
|
||||
"sponsor_name": sponsor.name if sponsor else "Unknown",
|
||||
"party": sponsor.party if sponsor else "Unknown",
|
||||
"state": sponsor.state if sponsor else "Unknown",
|
||||
"chamber": bill.chamber or "Unknown",
|
||||
"introduced_date": str(bill.introduced_date) if bill.introduced_date else "Unknown",
|
||||
"latest_action_text": bill.latest_action_text or "None",
|
||||
"latest_action_date": str(bill.latest_action_date) if bill.latest_action_date else "Unknown",
|
||||
}
|
||||
|
||||
# Check if a full brief already exists for this bill (from an earlier document version)
|
||||
previous_full_brief = (
|
||||
db.query(BillBrief)
|
||||
.filter_by(bill_id=doc.bill_id, brief_type="full")
|
||||
.order_by(BillBrief.created_at.desc())
|
||||
.first()
|
||||
)
|
||||
|
||||
provider = get_llm_provider()
|
||||
|
||||
if previous_full_brief and previous_full_brief.document_id:
|
||||
# New version of a bill we've already analyzed — generate amendment brief
|
||||
previous_doc = db.get(BillDocument, previous_full_brief.document_id)
|
||||
if previous_doc and previous_doc.raw_text:
|
||||
logger.info(f"Generating amendment brief for document {document_id} (bill {doc.bill_id})")
|
||||
brief = provider.generate_amendment_brief(doc.raw_text, previous_doc.raw_text, bill_metadata)
|
||||
brief_type = "amendment"
|
||||
else:
|
||||
logger.info(f"Previous document unavailable, generating full brief for document {document_id}")
|
||||
brief = provider.generate_brief(doc.raw_text, bill_metadata)
|
||||
brief_type = "full"
|
||||
else:
|
||||
logger.info(f"Generating full brief for document {document_id} (bill {doc.bill_id})")
|
||||
brief = provider.generate_brief(doc.raw_text, bill_metadata)
|
||||
brief_type = "full"
|
||||
|
||||
db_brief = BillBrief(
|
||||
bill_id=doc.bill_id,
|
||||
document_id=document_id,
|
||||
brief_type=brief_type,
|
||||
summary=brief.summary,
|
||||
key_points=brief.key_points,
|
||||
risks=brief.risks,
|
||||
deadlines=brief.deadlines,
|
||||
topic_tags=brief.topic_tags,
|
||||
llm_provider=brief.llm_provider,
|
||||
llm_model=brief.llm_model,
|
||||
)
|
||||
db.add(db_brief)
|
||||
db.commit()
|
||||
db.refresh(db_brief)
|
||||
|
||||
logger.info(f"{brief_type.capitalize()} brief {db_brief.id} created for bill {doc.bill_id} using {brief.llm_provider}/{brief.llm_model}")
|
||||
|
||||
# Trigger news fetch now that we have topic tags
|
||||
from app.workers.news_fetcher import fetch_news_for_bill
|
||||
fetch_news_for_bill.delay(doc.bill_id)
|
||||
|
||||
return {"status": "ok", "brief_id": db_brief.id, "brief_type": brief_type}
|
||||
|
||||
except Exception as exc:
|
||||
db.rollback()
|
||||
logger.error(f"LLM processing failed for document {document_id}: {exc}")
|
||||
raise self.retry(exc=exc, countdown=300) # 5 min backoff for LLM failures
|
||||
finally:
|
||||
db.close()
|
||||
104
backend/app/workers/news_fetcher.py
Normal file
104
backend/app/workers/news_fetcher.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""
|
||||
News fetcher — correlates bills with news articles.
|
||||
Triggered after LLM brief creation and on a 6-hour schedule for active bills.
|
||||
"""
|
||||
import logging
|
||||
from datetime import date, datetime, timedelta, timezone
|
||||
|
||||
from sqlalchemy import and_
|
||||
|
||||
from app.database import get_sync_db
|
||||
from app.models import Bill, BillBrief, NewsArticle
|
||||
from app.services import news_service
|
||||
from app.workers.celery_app import celery_app
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@celery_app.task(bind=True, max_retries=2, name="app.workers.news_fetcher.fetch_news_for_bill")
|
||||
def fetch_news_for_bill(self, bill_id: str):
|
||||
"""Fetch news articles for a specific bill."""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
bill = db.get(Bill, bill_id)
|
||||
if not bill:
|
||||
return {"status": "not_found"}
|
||||
|
||||
# Get topic tags from latest brief
|
||||
latest_brief = (
|
||||
db.query(BillBrief)
|
||||
.filter_by(bill_id=bill_id)
|
||||
.order_by(BillBrief.created_at.desc())
|
||||
.first()
|
||||
)
|
||||
topic_tags = latest_brief.topic_tags if latest_brief else []
|
||||
|
||||
query = news_service.build_news_query(
|
||||
bill_title=bill.title,
|
||||
short_title=bill.short_title,
|
||||
sponsor_name=None,
|
||||
bill_type=bill.bill_type,
|
||||
bill_number=bill.bill_number,
|
||||
)
|
||||
|
||||
articles = news_service.fetch_newsapi_articles(query)
|
||||
saved = 0
|
||||
for article in articles:
|
||||
url = article.get("url")
|
||||
if not url:
|
||||
continue
|
||||
# Idempotency: skip duplicate URLs
|
||||
existing = db.query(NewsArticle).filter_by(url=url).first()
|
||||
if existing:
|
||||
continue
|
||||
pub_at = None
|
||||
if article.get("published_at"):
|
||||
try:
|
||||
pub_at = datetime.fromisoformat(article["published_at"].replace("Z", "+00:00"))
|
||||
except Exception:
|
||||
pass
|
||||
db.add(NewsArticle(
|
||||
bill_id=bill_id,
|
||||
source=article.get("source", "")[:200],
|
||||
headline=article.get("headline", ""),
|
||||
url=url,
|
||||
published_at=pub_at,
|
||||
relevance_score=1.0,
|
||||
))
|
||||
saved += 1
|
||||
|
||||
db.commit()
|
||||
logger.info(f"Saved {saved} news articles for bill {bill_id}")
|
||||
return {"status": "ok", "saved": saved}
|
||||
|
||||
except Exception as exc:
|
||||
db.rollback()
|
||||
logger.error(f"News fetch failed for {bill_id}: {exc}")
|
||||
raise self.retry(exc=exc, countdown=300)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@celery_app.task(bind=True, name="app.workers.news_fetcher.fetch_news_for_active_bills")
|
||||
def fetch_news_for_active_bills(self):
|
||||
"""
|
||||
Scheduled task: fetch news for bills with recent actions (last 7 days).
|
||||
Respects the 100/day NewsAPI limit by processing at most 80 bills per run.
|
||||
"""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
cutoff = date.today() - timedelta(days=7)
|
||||
active_bills = (
|
||||
db.query(Bill)
|
||||
.filter(Bill.latest_action_date >= cutoff)
|
||||
.order_by(Bill.latest_action_date.desc())
|
||||
.limit(80)
|
||||
.all()
|
||||
)
|
||||
for bill in active_bills:
|
||||
fetch_news_for_bill.delay(bill.bill_id)
|
||||
|
||||
logger.info(f"Queued news fetch for {len(active_bills)} active bills")
|
||||
return {"queued": len(active_bills)}
|
||||
finally:
|
||||
db.close()
|
||||
111
backend/app/workers/trend_scorer.py
Normal file
111
backend/app/workers/trend_scorer.py
Normal file
@@ -0,0 +1,111 @@
|
||||
"""
|
||||
Trend scorer — calculates the daily zeitgeist score for bills.
|
||||
Runs nightly via Celery Beat.
|
||||
"""
|
||||
import logging
|
||||
from datetime import date, timedelta
|
||||
|
||||
from sqlalchemy import and_
|
||||
|
||||
from app.database import get_sync_db
|
||||
from app.models import Bill, BillBrief, TrendScore
|
||||
from app.services import news_service, trends_service
|
||||
from app.workers.celery_app import celery_app
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def calculate_composite_score(newsapi_count: int, gnews_count: int, gtrends_score: float) -> float:
|
||||
"""
|
||||
Weighted composite score (0–100):
|
||||
NewsAPI article count → 0–40 pts (saturates at 20 articles)
|
||||
Google News RSS count → 0–30 pts (saturates at 50 articles)
|
||||
Google Trends score → 0–30 pts (0–100 input)
|
||||
"""
|
||||
newsapi_pts = min(newsapi_count / 20, 1.0) * 40
|
||||
gnews_pts = min(gnews_count / 50, 1.0) * 30
|
||||
gtrends_pts = (gtrends_score / 100) * 30
|
||||
return round(newsapi_pts + gnews_pts + gtrends_pts, 2)
|
||||
|
||||
|
||||
@celery_app.task(bind=True, name="app.workers.trend_scorer.calculate_all_trend_scores")
|
||||
def calculate_all_trend_scores(self):
|
||||
"""Nightly task: calculate trend scores for bills active in the last 90 days."""
|
||||
db = get_sync_db()
|
||||
try:
|
||||
cutoff = date.today() - timedelta(days=90)
|
||||
active_bills = (
|
||||
db.query(Bill)
|
||||
.filter(Bill.latest_action_date >= cutoff)
|
||||
.all()
|
||||
)
|
||||
|
||||
scored = 0
|
||||
today = date.today()
|
||||
|
||||
for bill in active_bills:
|
||||
# Skip if already scored today
|
||||
existing = (
|
||||
db.query(TrendScore)
|
||||
.filter_by(bill_id=bill.bill_id, score_date=today)
|
||||
.first()
|
||||
)
|
||||
if existing:
|
||||
continue
|
||||
|
||||
# Get latest brief for topic tags
|
||||
latest_brief = (
|
||||
db.query(BillBrief)
|
||||
.filter_by(bill_id=bill.bill_id)
|
||||
.order_by(BillBrief.created_at.desc())
|
||||
.first()
|
||||
)
|
||||
topic_tags = latest_brief.topic_tags if latest_brief else []
|
||||
|
||||
# Build search query
|
||||
query = news_service.build_news_query(
|
||||
bill_title=bill.title,
|
||||
short_title=bill.short_title,
|
||||
sponsor_name=None,
|
||||
bill_type=bill.bill_type,
|
||||
bill_number=bill.bill_number,
|
||||
)
|
||||
|
||||
# Fetch counts
|
||||
newsapi_articles = news_service.fetch_newsapi_articles(query, days=30)
|
||||
newsapi_count = len(newsapi_articles)
|
||||
gnews_count = news_service.fetch_gnews_count(query, days=30)
|
||||
|
||||
# Google Trends
|
||||
keywords = trends_service.keywords_for_bill(
|
||||
title=bill.title or "",
|
||||
short_title=bill.short_title or "",
|
||||
topic_tags=topic_tags,
|
||||
)
|
||||
gtrends_score = trends_service.get_trends_score(keywords)
|
||||
|
||||
composite = calculate_composite_score(newsapi_count, gnews_count, gtrends_score)
|
||||
|
||||
db.add(TrendScore(
|
||||
bill_id=bill.bill_id,
|
||||
score_date=today,
|
||||
newsapi_count=newsapi_count,
|
||||
gnews_count=gnews_count,
|
||||
gtrends_score=gtrends_score,
|
||||
composite_score=composite,
|
||||
))
|
||||
scored += 1
|
||||
|
||||
if scored % 20 == 0:
|
||||
db.commit()
|
||||
|
||||
db.commit()
|
||||
logger.info(f"Scored {scored} bills")
|
||||
return {"scored": scored}
|
||||
|
||||
except Exception as exc:
|
||||
db.rollback()
|
||||
logger.error(f"Trend scoring failed: {exc}")
|
||||
raise
|
||||
finally:
|
||||
db.close()
|
||||
44
backend/requirements.txt
Normal file
44
backend/requirements.txt
Normal file
@@ -0,0 +1,44 @@
|
||||
# Web framework
|
||||
fastapi==0.115.5
|
||||
uvicorn[standard]==0.32.1
|
||||
python-multipart==0.0.18
|
||||
|
||||
# Database
|
||||
sqlalchemy==2.0.36
|
||||
asyncpg==0.30.0
|
||||
psycopg2-binary==2.9.10
|
||||
alembic==1.14.0
|
||||
|
||||
# Config
|
||||
pydantic-settings==2.6.1
|
||||
|
||||
# Task queue
|
||||
celery==5.4.0
|
||||
celery-redbeat==2.2.0
|
||||
kombu==5.4.2
|
||||
|
||||
# HTTP clients
|
||||
httpx==0.28.1
|
||||
requests==2.32.3
|
||||
tenacity==9.0.0
|
||||
|
||||
# LLM providers
|
||||
openai==1.57.4
|
||||
anthropic==0.40.0
|
||||
google-generativeai==0.8.3
|
||||
|
||||
# Document parsing
|
||||
beautifulsoup4==4.12.3
|
||||
lxml==5.3.0
|
||||
feedparser==6.0.11
|
||||
pdfminer.six==20231228
|
||||
|
||||
# Trends
|
||||
pytrends==4.9.2
|
||||
|
||||
# Redis client (for health check)
|
||||
redis==5.2.1
|
||||
|
||||
# Utilities
|
||||
python-dateutil==2.9.0
|
||||
tiktoken==0.8.0
|
||||
Reference in New Issue
Block a user