fix: 添加 Redis 分布式锁防止 poll_video_task 重复派发
Some checks failed
Build and Deploy / build-and-deploy (push) Failing after 6m17s
Some checks failed
Build and Deploy / build-and-deploy (push) Failing after 6m17s
recover_stuck_tasks 在 API 超时 >3 分钟时可能重复派发同一任务, 导致重复扣费风险。通过 cache.add 实现互斥锁保护。 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
55c26fb1f5
commit
ca6f2a0346
@ -21,19 +21,29 @@ def poll_video_task(self, record_id):
|
||||
from apps.generation.models import GenerationRecord
|
||||
from utils.airdrama_client import query_task, map_status
|
||||
|
||||
# 防重复:同一 record 同一时刻只允许一个 poll 在执行
|
||||
from django.core.cache import cache
|
||||
lock_key = f'poll_lock:{record_id}'
|
||||
if not cache.add(lock_key, '1', timeout=POLL_INTERVAL * 3):
|
||||
logger.info('poll_video_task: record %s already being polled, skipping', record_id)
|
||||
return
|
||||
|
||||
try:
|
||||
record = GenerationRecord.objects.get(pk=record_id)
|
||||
except GenerationRecord.DoesNotExist:
|
||||
logger.warning('poll_video_task: record %s not found', record_id)
|
||||
cache.delete(lock_key)
|
||||
return
|
||||
|
||||
ark_task_id = record.ark_task_id
|
||||
if not ark_task_id:
|
||||
logger.warning('poll_video_task: record %s has no ark_task_id', record_id)
|
||||
cache.delete(lock_key)
|
||||
return
|
||||
|
||||
if record.status not in ('queued', 'processing'):
|
||||
logger.info('poll_video_task: record %s already in terminal state: %s', record_id, record.status)
|
||||
cache.delete(lock_key)
|
||||
return
|
||||
|
||||
# Poll Volcano API
|
||||
@ -42,12 +52,14 @@ def poll_video_task(self, record_id):
|
||||
new_status = map_status(ark_resp.get('status', ''))
|
||||
except Exception:
|
||||
logger.exception('poll_video_task: API query failed for %s, will retry', ark_task_id)
|
||||
cache.delete(lock_key)
|
||||
raise self.retry(countdown=POLL_INTERVAL)
|
||||
|
||||
if new_status in ('queued', 'processing'):
|
||||
# Still running — update status, then re-enqueue
|
||||
record.status = new_status
|
||||
record.save(update_fields=['status', 'updated_at'])
|
||||
cache.delete(lock_key)
|
||||
raise self.retry(countdown=POLL_INTERVAL)
|
||||
|
||||
# Terminal state reached — process result
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user