AirGate/backend/utils/scheduler.py
seaislee1209 3213d6d98a feat: complete AirGate core features + full audit fixes
Quota allocation system:
- Replace monthly budget with one-time quota allocation (prepaid model)
- Support both adding (+) and deducting (-) quota with underflow protection
- Stepped alerts at configurable percentages (e.g., 50%/80%/90%)
- Auto-disable when quota exhausted (100%), alert state resets on new allocation
- Quota allocation history with operator audit trail

IAM management:
- Create new IAM sub-accounts directly from AirGate (auto-generates API keys)
- SecretKey shown once in dialog with copy-to-clipboard
- Attach/detach IAM policies via UI (ArkFullAccess, TOSFullAccess, etc.)
- Sync existing users from Volcengine
- Project list pulled from Volcengine API for dropdown selection

Security & auth:
- API Key authentication for external systems (AirDrama integration)
- SECRET_KEY enforced in production (raises error if missing with DEBUG=False)
- APIKeyUser with proper pk/is_staff attributes for DRF compatibility

Infrastructure:
- Docker + docker-compose for backend and frontend
- Nginx reverse proxy for frontend with /api/ forwarding
- Entrypoint with auto-migrate and default admin creation
- SQLite data persisted via Docker volume at /app/data/

Bug fixes from audit:
- Fix frontend referencing non-existent fields (current_month_spending, effective_budget, budget_usage_percent)
- Fix scheduler using naive datetime.now() → timezone.now()
- Fix scheduler reading interval from settings instead of GlobalConfig DB
- Fix docker-compose SQLite volume mounting as directory
- Fix CORS origin with explicit port 80
- Remove dead config (VOLC_ACCESS_KEY/SK, MONITOR_INTERVAL from settings)
- Remove unused imports

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 15:08:33 +08:00

164 lines
6.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""定时消费监控任务 -- 额度划拨制 + 阶梯式告警"""
import logging
from decimal import Decimal
from django.utils import timezone
logger = logging.getLogger(__name__)
_scheduler_started = False
def check_spending():
"""定时检查所有子账号消费,对比已划拨额度触发阶梯告警"""
from apps.monitor.models import VolcAccount, IAMUser, GlobalConfig, AlertRecord, SpendingRecord
from utils.crypto import decrypt
from utils.billing_service import BillingService
from utils.iam_service import IAMService
from utils.feishu import send_feishu_alert
config = GlobalConfig.get_solo()
webhook = config.feishu_webhook_url
for volc_account in VolcAccount.objects.filter(is_active=True):
ak = decrypt(volc_account.access_key_enc)
sk = decrypt(volc_account.secret_key_enc)
if not ak or not sk:
logger.warning(f"主账号 {volc_account.name} 密钥为空,跳过")
continue
billing = BillingService(ak, sk)
iam_svc = IAMService(ak, sk)
users = IAMUser.objects.filter(
volc_account=volc_account,
monitor_enabled=True,
).exclude(status=IAMUser.Status.DISABLED)
for user in users:
try:
# 查询当月消费(按项目筛选)
bill_period = timezone.now().strftime("%Y-%m")
spending = billing.get_spending_by_project(
bill_period, user.project_name or None
)
# 记录月度快照
SpendingRecord.objects.update_or_create(
iam_user=user, bill_period=bill_period,
defaults={'amount': spending},
)
# 累计消费 = 所有月份的消费之和
from django.db.models import Sum
total = SpendingRecord.objects.filter(
iam_user=user
).aggregate(total=Sum('amount'))['total'] or Decimal('0')
user.consumed_total = total
user.spending_updated_at = timezone.now()
quota = user.allocated_quota
if not quota or quota <= 0:
user.save(update_fields=['consumed_total', 'spending_updated_at'])
continue
usage_percent = float(total) / float(quota) * 100
triggered = user.triggered_alerts or []
# --- 阶梯式告警 ---
for step in user.get_alert_thresholds():
if usage_percent >= step and step not in triggered:
triggered.append(step)
threshold_amount = Decimal(str(quota)) * step / 100
AlertRecord.objects.create(
iam_user=user,
alert_type=AlertRecord.AlertType.WARNING,
title=f"{user.username} 消费达到额度 {step}%",
content=(
f"累计消费 ¥{total:.2f}"
f"已划拨额度 ¥{quota:.2f}{step}%\n"
f"剩余额度: ¥{user.remaining_quota:.2f}"
),
spending_amount=total,
threshold_amount=threshold_amount,
notified=True,
)
send_feishu_alert(
webhook,
f"⚠️ {user.username} 消费达到额度 {step}%",
f"**用户**: {user.username}\n"
f"**累计消费**: ¥{total:.2f}\n"
f"**已划拨额度**: ¥{quota:.2f}\n"
f"**剩余额度**: ¥{user.remaining_quota:.2f}\n"
f"**使用率**: {usage_percent:.1f}%",
template="orange" if step < 90 else "red",
)
# --- 额度用尽,自动停用 ---
if (usage_percent >= 100
and user.auto_disable_enabled
and 100 not in triggered):
triggered.append(100)
try:
iam_svc.disable_user(user.username)
user.status = IAMUser.Status.DISABLED
except Exception as e:
logger.error(f"停用用户 {user.username} 失败: {e}")
AlertRecord.objects.create(
iam_user=user,
alert_type=AlertRecord.AlertType.DISABLE,
title=f"{user.username} 额度用尽,已自动停用",
content=(
f"累计消费 ¥{total:.2f},已划拨额度 ¥{quota:.2f} 已用尽。\n"
f"如需继续使用,请划拨新额度后恢复账号。"
),
spending_amount=total,
threshold_amount=quota,
notified=True,
)
send_feishu_alert(
webhook,
f"🚨 {user.username} 额度用尽,已自动停用",
f"**用户**: {user.username}\n"
f"**累计消费**: ¥{total:.2f}\n"
f"**已划拨额度**: ¥{quota:.2f}\n"
f"额度已用尽,账号已自动停用。\n"
f"请在 AirGate 划拨新额度后恢复。",
template="red",
)
user.triggered_alerts = triggered
user.save(update_fields=[
'consumed_total', 'spending_updated_at',
'triggered_alerts', 'status',
])
except Exception as e:
logger.error(f"检查用户 {user.username} 消费失败: {e}")
def start_scheduler():
"""启动定时任务"""
global _scheduler_started
if _scheduler_started:
return
_scheduler_started = True
try:
from apscheduler.schedulers.background import BackgroundScheduler
from apps.monitor.models import GlobalConfig
scheduler = BackgroundScheduler()
config = GlobalConfig.get_solo()
interval = config.monitor_interval_seconds or 3600
scheduler.add_job(check_spending, 'interval', seconds=interval,
id='check_spending', replace_existing=True)
scheduler.start()
logger.info(f"消费监控定时任务已启动,间隔 {interval}")
except Exception as e:
logger.error(f"启动定时任务失败: {e}")