From 49e06fd3c4a11a64aa4db1c767e2c0a97ddb5ea5 Mon Sep 17 00:00:00 2001 From: zyc <1439655764@qq.com> Date: Sat, 4 Apr 2026 14:11:39 +0800 Subject: [PATCH 01/10] =?UTF-8?q?fix=20=E9=95=9C=E5=83=8F=E5=8E=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitea/workflows/deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitea/workflows/deploy.yaml b/.gitea/workflows/deploy.yaml index abec937..8c61d76 100644 --- a/.gitea/workflows/deploy.yaml +++ b/.gitea/workflows/deploy.yaml @@ -70,7 +70,7 @@ jobs: - name: Setup Kubectl run: | if ! command -v kubectl &>/dev/null; then - curl -LO "https://mirrors.aliyun.com/kubernetes/kubectl/v1.28.0/bin/linux/amd64/kubectl" + curl -LO "https://files.m.daocloud.io/dl.k8s.io/release/v1.28.0/bin/linux/amd64/kubectl" chmod +x kubectl && mv kubectl /usr/local/bin/ fi kubectl version --client From ca6f2a0346191a28722677e1a1f7296edc025415 Mon Sep 17 00:00:00 2001 From: zyc <1439655764@qq.com> Date: Sat, 4 Apr 2026 14:21:38 +0800 Subject: [PATCH 02/10] =?UTF-8?q?fix:=20=E6=B7=BB=E5=8A=A0=20Redis=20?= =?UTF-8?q?=E5=88=86=E5=B8=83=E5=BC=8F=E9=94=81=E9=98=B2=E6=AD=A2=20poll?= =?UTF-8?q?=5Fvideo=5Ftask=20=E9=87=8D=E5=A4=8D=E6=B4=BE=E5=8F=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit recover_stuck_tasks 在 API 超时 >3 分钟时可能重复派发同一任务, 导致重复扣费风险。通过 cache.add 实现互斥锁保护。 Co-Authored-By: Claude Opus 4.6 (1M context) --- backend/apps/generation/tasks.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/backend/apps/generation/tasks.py b/backend/apps/generation/tasks.py index 9e7384c..522001e 100644 --- a/backend/apps/generation/tasks.py +++ b/backend/apps/generation/tasks.py @@ -21,19 +21,29 @@ def poll_video_task(self, record_id): from apps.generation.models import GenerationRecord from utils.airdrama_client import query_task, map_status + # 防重复:同一 record 同一时刻只允许一个 poll 在执行 + from django.core.cache import cache + lock_key = f'poll_lock:{record_id}' + if not cache.add(lock_key, '1', timeout=POLL_INTERVAL * 3): + logger.info('poll_video_task: record %s already being polled, skipping', record_id) + return + try: record = GenerationRecord.objects.get(pk=record_id) except GenerationRecord.DoesNotExist: logger.warning('poll_video_task: record %s not found', record_id) + cache.delete(lock_key) return ark_task_id = record.ark_task_id if not ark_task_id: logger.warning('poll_video_task: record %s has no ark_task_id', record_id) + cache.delete(lock_key) return if record.status not in ('queued', 'processing'): logger.info('poll_video_task: record %s already in terminal state: %s', record_id, record.status) + cache.delete(lock_key) return # Poll Volcano API @@ -42,12 +52,14 @@ def poll_video_task(self, record_id): new_status = map_status(ark_resp.get('status', '')) except Exception: logger.exception('poll_video_task: API query failed for %s, will retry', ark_task_id) + cache.delete(lock_key) raise self.retry(countdown=POLL_INTERVAL) if new_status in ('queued', 'processing'): # Still running — update status, then re-enqueue record.status = new_status record.save(update_fields=['status', 'updated_at']) + cache.delete(lock_key) raise self.retry(countdown=POLL_INTERVAL) # Terminal state reached — process result From 05097d58f979992e175d79b71940e575229d20cf Mon Sep 17 00:00:00 2001 From: zyc <1439655764@qq.com> Date: Sat, 4 Apr 2026 14:33:58 +0800 Subject: [PATCH 03/10] =?UTF-8?q?perf:=20gunicorn=20=E5=90=AF=E7=94=A8=20g?= =?UTF-8?q?event=20=E5=BC=82=E6=AD=A5=E6=A8=A1=E5=BC=8F=EF=BC=8C=E5=B9=B6?= =?UTF-8?q?=E5=8F=91=E4=BB=8E=202=20=E6=8F=90=E5=8D=87=E5=88=B0=20400?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) --- backend/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index 21287ed..120928e 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -29,4 +29,4 @@ RUN chmod +x /app/entrypoint.sh EXPOSE 8000 ENTRYPOINT ["/app/entrypoint.sh"] -CMD ["gunicorn", "--bind", "0.0.0.0:8000", "--workers", "2", "--timeout", "120", "--access-logfile", "-", "--error-logfile", "-", "config.wsgi:application"] +CMD ["gunicorn", "--bind", "0.0.0.0:8000", "--workers", "2", "--worker-class", "gevent", "--worker-connections", "200", "--timeout", "120", "--access-logfile", "-", "--error-logfile", "-", "config.wsgi:application"] From 1ff985d64fc0a142a318293a4f412a4b7b817189 Mon Sep 17 00:00:00 2001 From: zyc <1439655764@qq.com> Date: Sat, 4 Apr 2026 14:35:31 +0800 Subject: [PATCH 04/10] =?UTF-8?q?fix:=20Deploy=20to=20K3s=20=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=203=20=E6=AC=A1=E9=87=8D=E8=AF=95=EF=BC=8C=E9=98=B2?= =?UTF-8?q?=E6=AD=A2=E5=86=85=E7=BD=91=E6=8A=96=E5=8A=A8=E5=AF=BC=E8=87=B4?= =?UTF-8?q?=E6=9E=84=E5=BB=BA=E5=A4=B1=E8=B4=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitea/workflows/deploy.yaml | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/.gitea/workflows/deploy.yaml b/.gitea/workflows/deploy.yaml index 8c61d76..884c085 100644 --- a/.gitea/workflows/deploy.yaml +++ b/.gitea/workflows/deploy.yaml @@ -130,21 +130,26 @@ jobs: --from-literal=ALIYUN_SMS_ACCESS_SECRET='${{ secrets.ALIYUN_SMS_ACCESS_SECRET }}' \ --dry-run=client -o yaml | kubectl apply -f - - # Apply manifests + # Apply manifests (with retry for transient network issues) set -o pipefail - { - kubectl apply -f k8s/backend-deployment.yaml - kubectl apply -f k8s/celery-deployment.yaml - kubectl apply -f k8s/web-deployment.yaml - kubectl apply -f k8s/ingress.yaml + for attempt in 1 2 3; do + echo "Deploy attempt $attempt/3..." + { + kubectl apply -f k8s/backend-deployment.yaml + kubectl apply -f k8s/celery-deployment.yaml + kubectl apply -f k8s/web-deployment.yaml + kubectl apply -f k8s/ingress.yaml - # Preserve real client IP - kubectl patch svc traefik -n kube-system -p '{"spec":{"externalTrafficPolicy":"Local"}}' 2>/dev/null || true + # Preserve real client IP + kubectl patch svc traefik -n kube-system -p '{"spec":{"externalTrafficPolicy":"Local"}}' 2>/dev/null || true - kubectl rollout restart deployment/video-backend - kubectl rollout restart deployment/celery-worker - kubectl rollout restart deployment/video-web - } 2>&1 | tee /tmp/deploy.log + kubectl rollout restart deployment/video-backend + kubectl rollout restart deployment/celery-worker + kubectl rollout restart deployment/video-web + } 2>&1 | tee /tmp/deploy.log && break + echo "Attempt $attempt failed, retrying in 10s..." + sleep 10 + done # ===== Log Center: failure reporting ===== - name: Report failure to Log Center From 23658243133b5c4e5174a331cf9c9daaabcc3783 Mon Sep 17 00:00:00 2001 From: zyc <1439655764@qq.com> Date: Sat, 4 Apr 2026 14:37:28 +0800 Subject: [PATCH 05/10] =?UTF-8?q?fix:=20CI=20=E5=85=A8=E9=93=BE=E8=B7=AF?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0=203=20=E6=AC=A1=E9=87=8D=E8=AF=95=EF=BC=88bu?= =?UTF-8?q?ild/push/kubectl/deploy=EF=BC=89=E9=98=B2=E6=AD=A2=E7=BD=91?= =?UTF-8?q?=E7=BB=9C=E6=8A=96=E5=8A=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitea/workflows/deploy.yaml | 43 +++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/.gitea/workflows/deploy.yaml b/.gitea/workflows/deploy.yaml index 884c085..5133d67 100644 --- a/.gitea/workflows/deploy.yaml +++ b/.gitea/workflows/deploy.yaml @@ -49,28 +49,45 @@ jobs: id: build_backend run: | set -o pipefail - DOCKER_BUILDKIT=0 docker build \ - --tag ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-backend:${{ env.IMAGE_TAG }} \ - --tag ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-backend:latest \ - ./backend 2>&1 | tee /tmp/build.log - docker push ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-backend:${{ env.IMAGE_TAG }} - docker push ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-backend:latest + for attempt in 1 2 3; do + echo "Build backend attempt $attempt/3..." + DOCKER_BUILDKIT=0 docker build \ + --tag ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-backend:${{ env.IMAGE_TAG }} \ + --tag ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-backend:latest \ + ./backend 2>&1 | tee /tmp/build.log && break + echo "Attempt $attempt failed, retrying in 10s..." && sleep 10 + done + for attempt in 1 2 3; do + docker push ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-backend:${{ env.IMAGE_TAG }} && \ + docker push ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-backend:latest && break + echo "Push attempt $attempt failed, retrying in 10s..." && sleep 10 + done - name: Build and Push Web id: build_web run: | set -o pipefail - DOCKER_BUILDKIT=0 docker build \ - --tag ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-web:${{ env.IMAGE_TAG }} \ - --tag ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-web:latest \ - ./web 2>&1 | tee -a /tmp/build.log - docker push ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-web:${{ env.IMAGE_TAG }} - docker push ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-web:latest + for attempt in 1 2 3; do + echo "Build web attempt $attempt/3..." + DOCKER_BUILDKIT=0 docker build \ + --tag ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-web:${{ env.IMAGE_TAG }} \ + --tag ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-web:latest \ + ./web 2>&1 | tee -a /tmp/build.log && break + echo "Attempt $attempt failed, retrying in 10s..." && sleep 10 + done + for attempt in 1 2 3; do + docker push ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-web:${{ env.IMAGE_TAG }} && \ + docker push ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-web:latest && break + echo "Push attempt $attempt failed, retrying in 10s..." && sleep 10 + done - name: Setup Kubectl run: | if ! command -v kubectl &>/dev/null; then - curl -LO "https://files.m.daocloud.io/dl.k8s.io/release/v1.28.0/bin/linux/amd64/kubectl" + for attempt in 1 2 3; do + curl -LO "https://files.m.daocloud.io/dl.k8s.io/release/v1.28.0/bin/linux/amd64/kubectl" && break + echo "Download attempt $attempt failed, retrying in 5s..." && sleep 5 + done chmod +x kubectl && mv kubectl /usr/local/bin/ fi kubectl version --client From 43fe1b8909c16ac1230ef7432417525c8cb49369 Mon Sep 17 00:00:00 2001 From: zyc <1439655764@qq.com> Date: Sat, 4 Apr 2026 15:15:01 +0800 Subject: [PATCH 06/10] =?UTF-8?q?fix:=20=E5=B0=86=20kubectl=20secret=20?= =?UTF-8?q?=E5=88=9B=E5=BB=BA=E4=B9=9F=E7=BA=B3=E5=85=A5=E9=87=8D=E8=AF=95?= =?UTF-8?q?=E5=BE=AA=E7=8E=AF=EF=BC=8C=E4=BF=AE=E5=A4=8D=E9=87=8D=E8=AF=95?= =?UTF-8?q?=E6=9C=AA=E7=94=9F=E6=95=88=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitea/workflows/deploy.yaml | 38 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/.gitea/workflows/deploy.yaml b/.gitea/workflows/deploy.yaml index 5133d67..4d83365 100644 --- a/.gitea/workflows/deploy.yaml +++ b/.gitea/workflows/deploy.yaml @@ -130,28 +130,28 @@ jobs: sed -i "s|redis://zyc:Zyc188208@redis-shzlsczo52dft8mia.redis.ivolces.com:6379/0|${{ env.REDIS_URL }}|g" k8s/backend-deployment.yaml sed -i "s|redis://zyc:Zyc188208@redis-shzlsczo52dft8mia.redis.ivolces.com:6379/0|${{ env.REDIS_URL }}|g" k8s/celery-deployment.yaml - # Create/update image pull secret for CR - kubectl create secret docker-registry cr-pull-secret \ - --docker-server="${{ env.CR_SERVER_ACTIVE }}" \ - --docker-username="${{ env.CR_USERNAME_ACTIVE }}" \ - --docker-password="${{ env.CR_PASSWORD_ACTIVE }}" \ - --dry-run=client -o yaml | kubectl apply -f - - - # Create/update secrets (业务密钥,DB 已写在 yaml 里) - kubectl create secret generic video-backend-secrets \ - --from-literal=ARK_API_KEY='${{ secrets.ARK_API_KEY }}' \ - --from-literal=TOS_ACCESS_KEY='${{ secrets.TOS_ACCESS_KEY }}' \ - --from-literal=TOS_SECRET_KEY='${{ secrets.TOS_SECRET_KEY }}' \ - --from-literal=DJANGO_SECRET_KEY='${{ secrets.DJANGO_SECRET_KEY }}' \ - --from-literal=ALIYUN_SMS_ACCESS_KEY='${{ secrets.ALIYUN_SMS_ACCESS_KEY }}' \ - --from-literal=ALIYUN_SMS_ACCESS_SECRET='${{ secrets.ALIYUN_SMS_ACCESS_SECRET }}' \ - --dry-run=client -o yaml | kubectl apply -f - - - # Apply manifests (with retry for transient network issues) - set -o pipefail + # All kubectl operations with retry (K3s 内网连接可能抖动) for attempt in 1 2 3; do echo "Deploy attempt $attempt/3..." { + # Create/update image pull secret for CR + kubectl create secret docker-registry cr-pull-secret \ + --docker-server="${{ env.CR_SERVER_ACTIVE }}" \ + --docker-username="${{ env.CR_USERNAME_ACTIVE }}" \ + --docker-password="${{ env.CR_PASSWORD_ACTIVE }}" \ + --dry-run=client -o yaml | kubectl apply -f - + + # Create/update secrets (业务密钥,DB 已写在 yaml 里) + kubectl create secret generic video-backend-secrets \ + --from-literal=ARK_API_KEY='${{ secrets.ARK_API_KEY }}' \ + --from-literal=TOS_ACCESS_KEY='${{ secrets.TOS_ACCESS_KEY }}' \ + --from-literal=TOS_SECRET_KEY='${{ secrets.TOS_SECRET_KEY }}' \ + --from-literal=DJANGO_SECRET_KEY='${{ secrets.DJANGO_SECRET_KEY }}' \ + --from-literal=ALIYUN_SMS_ACCESS_KEY='${{ secrets.ALIYUN_SMS_ACCESS_KEY }}' \ + --from-literal=ALIYUN_SMS_ACCESS_SECRET='${{ secrets.ALIYUN_SMS_ACCESS_SECRET }}' \ + --dry-run=client -o yaml | kubectl apply -f - + + # Apply manifests kubectl apply -f k8s/backend-deployment.yaml kubectl apply -f k8s/celery-deployment.yaml kubectl apply -f k8s/web-deployment.yaml From a8ffd6417a6ce79aaee187f79279db6a7f8b81f2 Mon Sep 17 00:00:00 2001 From: zyc <1439655764@qq.com> Date: Sat, 4 Apr 2026 15:46:52 +0800 Subject: [PATCH 07/10] feat: add Docker cleanup step to CI pipeline Automatically prune unused containers, images and build cache after each CI run to prevent disk space exhaustion on the runner. Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitea/workflows/deploy.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.gitea/workflows/deploy.yaml b/.gitea/workflows/deploy.yaml index 4d83365..ecfaa0d 100644 --- a/.gitea/workflows/deploy.yaml +++ b/.gitea/workflows/deploy.yaml @@ -226,3 +226,13 @@ jobs: \"run_url\": \"https://gitea.airlabs.art/${{ github.repository }}/actions/runs/${{ github.run_number }}\" } }" || true + + # ===== Cleanup: remove unused Docker resources ===== + - name: Docker Cleanup + if: always() + run: | + docker container prune -f + docker image prune -a -f + docker builder prune -a -f + echo "Disk usage after cleanup:" + df -h / | tail -1 From 622491c3d0aff25af87f820bc61b2c39f1540a05 Mon Sep 17 00:00:00 2001 From: zyc <1439655764@qq.com> Date: Sat, 4 Apr 2026 16:07:44 +0800 Subject: [PATCH 08/10] =?UTF-8?q?chore:=20=E8=A7=A6=E5=8F=91=E6=9E=84?= =?UTF-8?q?=E5=BB=BA=E9=AA=8C=E8=AF=81=20runner=20host=20=E7=BD=91?= =?UTF-8?q?=E7=BB=9C=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) From 1e76052c64fadb61b0b97cb02fbdf08983508733 Mon Sep 17 00:00:00 2001 From: zyc <1439655764@qq.com> Date: Sat, 4 Apr 2026 16:17:34 +0800 Subject: [PATCH 09/10] =?UTF-8?q?fix:=20=E7=94=A8=20printf=20=E5=86=99=20k?= =?UTF-8?q?ubeconfig=20=E9=98=B2=E6=AD=A2=E5=A4=9A=E8=A1=8C=E5=86=85?= =?UTF-8?q?=E5=AE=B9=E8=A2=AB=20echo=20=E6=88=AA=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitea/workflows/deploy.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.gitea/workflows/deploy.yaml b/.gitea/workflows/deploy.yaml index ecfaa0d..d61ac1c 100644 --- a/.gitea/workflows/deploy.yaml +++ b/.gitea/workflows/deploy.yaml @@ -96,11 +96,13 @@ jobs: run: | mkdir -p $HOME/.kube if [[ "${{ github.ref_name }}" == "master" ]]; then - echo "${{ secrets.VOLCANO_PROD_KUBE_CONFIG }}" > $HOME/.kube/config + printf '%s\n' '${{ secrets.VOLCANO_PROD_KUBE_CONFIG }}' > $HOME/.kube/config elif [[ "${{ github.ref_name }}" == "dev" ]]; then - echo "${{ secrets.VOLCANO_TEST_KUBE_CONFIG }}" > $HOME/.kube/config + printf '%s\n' '${{ secrets.VOLCANO_TEST_KUBE_CONFIG }}' > $HOME/.kube/config fi chmod 600 $HOME/.kube/config + echo "kubeconfig lines: $(wc -l < $HOME/.kube/config)" + grep server $HOME/.kube/config || echo "WARNING: no server found in kubeconfig" - name: Deploy to K3s id: deploy From 95bdb0a6e86011b271e9651e76178b9655cf8cbc Mon Sep 17 00:00:00 2001 From: zyc <1439655764@qq.com> Date: Sat, 4 Apr 2026 17:10:55 +0800 Subject: [PATCH 10/10] =?UTF-8?q?fix:=20USE=5FTZ=3DFalse=20=E7=BB=9F?= =?UTF-8?q?=E4=B8=80=E4=BD=BF=E7=94=A8=E5=8C=97=E4=BA=AC=E6=97=B6=E9=97=B4?= =?UTF-8?q?=EF=BC=8C=E4=BF=AE=E5=A4=8D=20recover=5Fstuck=5Ftasks=20?= =?UTF-8?q?=E6=97=B6=E5=8C=BA=E6=AF=94=E8=BE=83=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) --- backend/config/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/config/settings.py b/backend/config/settings.py index ce7004c..b9a1127 100644 --- a/backend/config/settings.py +++ b/backend/config/settings.py @@ -189,7 +189,7 @@ CELERY_BEAT_SCHEDULE = { LANGUAGE_CODE = 'zh-hans' TIME_ZONE = 'Asia/Shanghai' USE_I18N = True -USE_TZ = True +USE_TZ = False STATIC_URL = 'static/' STATIC_ROOT = BASE_DIR / 'staticfiles'