Submit up to 1000 unbriefed documents to the provider Batch API in one
shot instead of individual synchronous LLM calls. Results are polled
every 30 minutes via a new Celery beat task and imported automatically.
- New worker: llm_batch_processor.py
- submit_llm_batch: guards against duplicate batches, builds JSONL
(OpenAI) or request list (Anthropic), stores state in AppSetting
- poll_llm_batch_results: checks batch status, imports completed
results with idempotency, emits notifications + triggers news fetch
- celery_app: register worker, route to llm queue, beat every 30 min
- admin API: POST /submit-llm-batch + GET /llm-batch-status endpoints
- Frontend: submitLlmBatch + getLlmBatchStatus in adminAPI; settings
page shows batch control row (openai/anthropic only) with live
progress line while batch is processing
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
101 lines
4.0 KiB
Python
101 lines
4.0 KiB
Python
from celery import Celery
|
|
from celery.schedules import crontab
|
|
from kombu import Queue
|
|
|
|
from app.config import settings
|
|
|
|
celery_app = Celery(
|
|
"pocketveto",
|
|
broker=settings.REDIS_URL,
|
|
backend=settings.REDIS_URL,
|
|
include=[
|
|
"app.workers.congress_poller",
|
|
"app.workers.document_fetcher",
|
|
"app.workers.llm_processor",
|
|
"app.workers.news_fetcher",
|
|
"app.workers.trend_scorer",
|
|
"app.workers.member_interest",
|
|
"app.workers.notification_dispatcher",
|
|
"app.workers.llm_batch_processor",
|
|
],
|
|
)
|
|
|
|
celery_app.conf.update(
|
|
task_serializer="json",
|
|
result_serializer="json",
|
|
accept_content=["json"],
|
|
timezone="UTC",
|
|
enable_utc=True,
|
|
# Late ack: task is only removed from queue after completion, not on pickup.
|
|
# Combined with idempotent tasks, this ensures no work is lost if a worker crashes.
|
|
task_acks_late=True,
|
|
# Prevent workers from prefetching LLM tasks and blocking other workers.
|
|
worker_prefetch_multiplier=1,
|
|
# Route tasks to named queues
|
|
task_routes={
|
|
"app.workers.congress_poller.*": {"queue": "polling"},
|
|
"app.workers.document_fetcher.*": {"queue": "documents"},
|
|
"app.workers.llm_processor.*": {"queue": "llm"},
|
|
"app.workers.llm_batch_processor.*": {"queue": "llm"},
|
|
"app.workers.news_fetcher.*": {"queue": "news"},
|
|
"app.workers.trend_scorer.*": {"queue": "news"},
|
|
"app.workers.member_interest.*": {"queue": "news"},
|
|
"app.workers.notification_dispatcher.*": {"queue": "polling"},
|
|
},
|
|
task_queues=[
|
|
Queue("polling"),
|
|
Queue("documents"),
|
|
Queue("llm"),
|
|
Queue("news"),
|
|
],
|
|
# RedBeat stores schedule in Redis — restart-safe and dynamically updatable
|
|
redbeat_redis_url=settings.REDIS_URL,
|
|
beat_scheduler="redbeat.RedBeatScheduler",
|
|
beat_schedule={
|
|
"poll-congress-bills": {
|
|
"task": "app.workers.congress_poller.poll_congress_bills",
|
|
"schedule": crontab(minute=f"*/{settings.CONGRESS_POLL_INTERVAL_MINUTES}"),
|
|
},
|
|
"fetch-news-active-bills": {
|
|
"task": "app.workers.news_fetcher.fetch_news_for_active_bills",
|
|
"schedule": crontab(hour="*/6", minute=0),
|
|
},
|
|
"calculate-trend-scores": {
|
|
"task": "app.workers.trend_scorer.calculate_all_trend_scores",
|
|
"schedule": crontab(hour=2, minute=0),
|
|
},
|
|
"fetch-news-active-members": {
|
|
"task": "app.workers.member_interest.fetch_news_for_active_members",
|
|
"schedule": crontab(hour="*/12", minute=30),
|
|
},
|
|
"calculate-member-trend-scores": {
|
|
"task": "app.workers.member_interest.calculate_all_member_trend_scores",
|
|
"schedule": crontab(hour=3, minute=0),
|
|
},
|
|
"sync-members": {
|
|
"task": "app.workers.congress_poller.sync_members",
|
|
"schedule": crontab(hour=1, minute=0), # 1 AM UTC daily — refreshes chamber/district/contact info
|
|
},
|
|
"fetch-actions-active-bills": {
|
|
"task": "app.workers.congress_poller.fetch_actions_for_active_bills",
|
|
"schedule": crontab(hour=4, minute=0), # 4 AM UTC, after trend + member scoring
|
|
},
|
|
"dispatch-notifications": {
|
|
"task": "app.workers.notification_dispatcher.dispatch_notifications",
|
|
"schedule": crontab(minute="*/5"), # Every 5 minutes
|
|
},
|
|
"send-notification-digest": {
|
|
"task": "app.workers.notification_dispatcher.send_notification_digest",
|
|
"schedule": crontab(hour=8, minute=0), # 8 AM UTC daily
|
|
},
|
|
"send-weekly-digest": {
|
|
"task": "app.workers.notification_dispatcher.send_weekly_digest",
|
|
"schedule": crontab(hour=8, minute=30, day_of_week=1), # Monday 8:30 AM UTC
|
|
},
|
|
"poll-llm-batch-results": {
|
|
"task": "app.workers.llm_batch_processor.poll_llm_batch_results",
|
|
"schedule": crontab(minute="*/30"),
|
|
},
|
|
},
|
|
)
|