zyc a6a3928091
All checks were successful
Build and Deploy / build-and-deploy (push) Successful in 4m9s
perf: kubectl 4s 超时 + 5 次重试,避免 K3s 内网抖动卡死部署
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 11:29:24 +08:00

243 lines
12 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

name: Build and Deploy
on:
push:
branches:
- master
- dev
jobs:
build-and-deploy:
runs-on: ubuntu-latest
steps:
- name: Checkout
run: |
git clone --depth=1 --branch=${{ github.ref_name }} https://gitea.airlabs.art/${{ github.repository }}.git .
- name: Set environment by branch
run: |
SHORT_SHA=$(echo "${{ github.sha }}" | cut -c1-7)
BUILD_DATE=$(date +%Y%m%d)
if [[ "${{ github.ref_name }}" == "master" ]]; then
echo "IMAGE_TAG=prod-${BUILD_DATE}-${SHORT_SHA}" >> $GITHUB_ENV
echo "CR_SERVER_ACTIVE=gitea-prod-cn-shanghai.cr.volces.com" >> $GITHUB_ENV
echo "CR_USERNAME_ACTIVE=seaislee@76339115" >> $GITHUB_ENV
echo "CR_PASSWORD_ACTIVE=${{ secrets.CR_PROD_PASSWORD }}" >> $GITHUB_ENV
echo "CR_ORG=prod" >> $GITHUB_ENV
echo "DEPLOY_ENV=production" >> $GITHUB_ENV
echo "DOMAIN_API=airflow-studio-api.airlabs.art" >> $GITHUB_ENV
echo "DOMAIN_WEB=airflow-studio.airlabs.art" >> $GITHUB_ENV
echo "REDIS_URL=redis://zyc:Zyc188208@redis-shzlf5t46gjvow7ua.redis.ivolces.com:6379/0" >> $GITHUB_ENV
elif [[ "${{ github.ref_name }}" == "dev" ]]; then
echo "IMAGE_TAG=dev-${BUILD_DATE}-${SHORT_SHA}" >> $GITHUB_ENV
echo "CR_SERVER_ACTIVE=${{ secrets.CR_SERVER }}" >> $GITHUB_ENV
echo "CR_USERNAME_ACTIVE=${{ secrets.CR_USERNAME }}" >> $GITHUB_ENV
echo "CR_PASSWORD_ACTIVE=${{ secrets.CR_PASSWORD }}" >> $GITHUB_ENV
echo "CR_ORG=dev" >> $GITHUB_ENV
echo "DEPLOY_ENV=development" >> $GITHUB_ENV
echo "DOMAIN_API=airflow-studio-api.test.airlabs.art" >> $GITHUB_ENV
echo "DOMAIN_WEB=airflow-studio.test.airlabs.art" >> $GITHUB_ENV
echo "REDIS_URL=redis://zyc:Zyc188208@redis-shzlsczo52dft8mia.redis.ivolces.com:6379/0" >> $GITHUB_ENV
fi
- name: Login to Volcano Engine CR
run: |
echo "${{ env.CR_PASSWORD_ACTIVE }}" | docker login --username "${{ env.CR_USERNAME_ACTIVE }}" --password-stdin ${{ env.CR_SERVER_ACTIVE }}
- name: Build and Push Backend
id: build_backend
run: |
set -o pipefail
for attempt in 1 2 3; do
echo "Build backend attempt $attempt/3..."
DOCKER_BUILDKIT=0 docker build \
--tag ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-backend:${{ env.IMAGE_TAG }} \
--tag ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-backend:latest \
./backend 2>&1 | tee /tmp/build.log && break
echo "Attempt $attempt failed, retrying in 10s..." && sleep 10
done
for attempt in 1 2 3; do
docker push ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-backend:${{ env.IMAGE_TAG }} && \
docker push ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-backend:latest && break
echo "Push attempt $attempt failed, retrying in 10s..." && sleep 10
done
- name: Build and Push Web
id: build_web
run: |
set -o pipefail
for attempt in 1 2 3; do
echo "Build web attempt $attempt/3..."
DOCKER_BUILDKIT=0 docker build \
--tag ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-web:${{ env.IMAGE_TAG }} \
--tag ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-web:latest \
./web 2>&1 | tee -a /tmp/build.log && break
echo "Attempt $attempt failed, retrying in 10s..." && sleep 10
done
for attempt in 1 2 3; do
docker push ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-web:${{ env.IMAGE_TAG }} && \
docker push ${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}/video-web:latest && break
echo "Push attempt $attempt failed, retrying in 10s..." && sleep 10
done
- name: Setup Kubectl
run: |
if ! command -v kubectl &>/dev/null; then
for attempt in 1 2 3; do
curl -LO "https://files.m.daocloud.io/dl.k8s.io/release/v1.28.0/bin/linux/amd64/kubectl" && break
echo "Download attempt $attempt failed, retrying in 5s..." && sleep 5
done
chmod +x kubectl && mv kubectl /usr/bin/kubectl
fi
kubectl version --client
- name: Set kubeconfig
run: |
mkdir -p $HOME/.kube
if [[ "${{ github.ref_name }}" == "master" ]]; then
printf '%s\n' '${{ secrets.VOLCANO_PROD_KUBE_CONFIG }}' > $HOME/.kube/config
elif [[ "${{ github.ref_name }}" == "dev" ]]; then
printf '%s\n' '${{ secrets.VOLCANO_TEST_KUBE_CONFIG }}' > $HOME/.kube/config
fi
chmod 600 $HOME/.kube/config
echo "kubeconfig lines: $(wc -l < $HOME/.kube/config)"
grep server $HOME/.kube/config || echo "WARNING: no server found in kubeconfig"
- name: Deploy to K3s
id: deploy
run: |
echo "Environment: ${{ env.DEPLOY_ENV }}"
CR_IMAGE="${{ env.CR_SERVER_ACTIVE }}/${{ env.CR_ORG }}"
# Replace image placeholders
sed -i "s|\${CI_REGISTRY_IMAGE}/video-backend:latest|${CR_IMAGE}/video-backend:${{ env.IMAGE_TAG }}|g" k8s/backend-deployment.yaml
sed -i "s|\${CI_REGISTRY_IMAGE}/video-backend:latest|${CR_IMAGE}/video-backend:${{ env.IMAGE_TAG }}|g" k8s/celery-deployment.yaml
sed -i "s|\${CI_REGISTRY_IMAGE}/video-web:latest|${CR_IMAGE}/video-web:${{ env.IMAGE_TAG }}|g" k8s/web-deployment.yaml
# Replace domain placeholders in ingress
sed -i "s|airflow-studio-api.airlabs.art|${{ env.DOMAIN_API }}|g" k8s/ingress.yaml
sed -i "s|airflow-studio.airlabs.art|${{ env.DOMAIN_WEB }}|g" k8s/ingress.yaml
# Replace DB config for production
if [[ "${{ env.DEPLOY_ENV }}" == "production" ]]; then
sed -i "s|mysql8351f937d637.rds.ivolces.com|mysqld9bb4e81696d.rds.ivolces.com|g" k8s/backend-deployment.yaml
sed -i "s|mysql8351f937d637.rds.ivolces.com|mysqld9bb4e81696d.rds.ivolces.com|g" k8s/celery-deployment.yaml
fi
# Replace CORS origin
sed -i "s|https://airflow-studio.airlabs.art|https://${{ env.DOMAIN_WEB }}|g" k8s/backend-deployment.yaml
# Replace Redis URL by environment
sed -i "s|redis://zyc:Zyc188208@redis-shzlsczo52dft8mia.redis.ivolces.com:6379/0|${{ env.REDIS_URL }}|g" k8s/backend-deployment.yaml
sed -i "s|redis://zyc:Zyc188208@redis-shzlsczo52dft8mia.redis.ivolces.com:6379/0|${{ env.REDIS_URL }}|g" k8s/celery-deployment.yaml
# All kubectl operations with retry (K3s 内网连接可能抖动)
export KUBECTL_TIMEOUT="--request-timeout=4s"
for attempt in 1 2 3 4 5; do
echo "Deploy attempt $attempt/5..."
{
# Create/update image pull secret for CR
kubectl $KUBECTL_TIMEOUT create secret docker-registry cr-pull-secret \
--docker-server="${{ env.CR_SERVER_ACTIVE }}" \
--docker-username="${{ env.CR_USERNAME_ACTIVE }}" \
--docker-password="${{ env.CR_PASSWORD_ACTIVE }}" \
--dry-run=client -o yaml | kubectl $KUBECTL_TIMEOUT apply -f -
# Create/update secrets (业务密钥DB 已写在 yaml 里)
kubectl $KUBECTL_TIMEOUT create secret generic video-backend-secrets \
--from-literal=ARK_API_KEY='${{ secrets.ARK_API_KEY }}' \
--from-literal=TOS_ACCESS_KEY='${{ secrets.TOS_ACCESS_KEY }}' \
--from-literal=TOS_SECRET_KEY='${{ secrets.TOS_SECRET_KEY }}' \
--from-literal=DJANGO_SECRET_KEY='${{ secrets.DJANGO_SECRET_KEY }}' \
--from-literal=ALIYUN_SMS_ACCESS_KEY='${{ secrets.ALIYUN_SMS_ACCESS_KEY }}' \
--from-literal=ALIYUN_SMS_ACCESS_SECRET='${{ secrets.ALIYUN_SMS_ACCESS_SECRET }}' \
--dry-run=client -o yaml | kubectl $KUBECTL_TIMEOUT apply -f -
# Apply manifests
kubectl $KUBECTL_TIMEOUT apply -f k8s/backend-deployment.yaml
kubectl $KUBECTL_TIMEOUT apply -f k8s/celery-deployment.yaml
kubectl $KUBECTL_TIMEOUT apply -f k8s/web-deployment.yaml
kubectl $KUBECTL_TIMEOUT apply -f k8s/ingress.yaml
# Preserve real client IP
kubectl $KUBECTL_TIMEOUT patch svc traefik -n kube-system -p '{"spec":{"externalTrafficPolicy":"Local"}}' 2>/dev/null || true
kubectl $KUBECTL_TIMEOUT rollout restart deployment/video-backend
kubectl $KUBECTL_TIMEOUT rollout restart deployment/celery-worker
kubectl $KUBECTL_TIMEOUT rollout restart deployment/video-web
} 2>&1 | tee /tmp/deploy.log && break
echo "Attempt $attempt failed, retrying in 30s..."
sleep 30
done
# ===== Log Center: failure reporting =====
- name: Report failure to Log Center
if: failure()
run: |
BUILD_LOG=""
DEPLOY_LOG=""
FAILED_STEP="unknown"
if [[ "${{ steps.build_backend.outcome }}" == "failure" || "${{ steps.build_web.outcome }}" == "failure" ]]; then
FAILED_STEP="build"
if [ -f /tmp/build.log ]; then
BUILD_LOG=$(tail -50 /tmp/build.log | sed 's/"/\\"/g' | sed ':a;N;$!ba;s/\n/\\n/g')
fi
elif [[ "${{ steps.deploy.outcome }}" == "failure" ]]; then
FAILED_STEP="deploy"
if [ -f /tmp/deploy.log ]; then
DEPLOY_LOG=$(tail -50 /tmp/deploy.log | sed 's/"/\\"/g' | sed ':a;N;$!ba;s/\n/\\n/g')
fi
fi
ERROR_LOG="${BUILD_LOG}${DEPLOY_LOG}"
if [ -z "$ERROR_LOG" ]; then
ERROR_LOG="No captured output. Check Gitea Actions UI for details."
fi
if [[ "$FAILED_STEP" == "deploy" ]]; then
SOURCE="deployment"
ERROR_TYPE="DeployError"
else
SOURCE="cicd"
ERROR_TYPE="DockerBuildError"
fi
curl -s -X POST "https://qiyuan-log-center-api.airlabs.art/api/v1/logs/report" \
-H "Content-Type: application/json" \
-d "{
\"project_id\": \"video_backend\",
\"environment\": \"${{ env.DEPLOY_ENV }}\",
\"level\": \"ERROR\",
\"source\": \"${SOURCE}\",
\"commit_hash\": \"${{ github.sha }}\",
\"repo_url\": \"https://gitea.airlabs.art/zyc/video-shuoshan.git\",
\"error\": {
\"type\": \"${ERROR_TYPE}\",
\"message\": \"[${FAILED_STEP}] Build and Deploy failed on branch ${{ github.ref_name }}\",
\"stack_trace\": [\"${ERROR_LOG}\"]
},
\"context\": {
\"job_name\": \"build-and-deploy\",
\"step_name\": \"${FAILED_STEP}\",
\"workflow\": \"${{ github.workflow }}\",
\"run_id\": \"${{ github.run_number }}\",
\"branch\": \"${{ github.ref_name }}\",
\"actor\": \"${{ github.actor }}\",
\"commit\": \"${{ github.sha }}\",
\"run_url\": \"https://gitea.airlabs.art/${{ github.repository }}/actions/runs/${{ github.run_number }}\"
}
}" || true
# ===== Cleanup: remove unused Docker resources =====
- name: Docker Cleanup
if: always()
run: |
docker container prune -f
docker image prune -f
docker builder prune -a -f
echo "Disk usage after cleanup:"
df -h / | tail -1