Complete documentation for future sessions

- CLAUDE.md for AI agents to understand the codebase
- GITEA-GUIDE.md centralizes all Gitea operations (API, Registry, Auth)
- DEVELOPMENT-WORKFLOW.md explains complete dev process
- ROADMAP.md, NEXT-SESSION.md for planning
- QUICK-REFERENCE.md, TROUBLESHOOTING.md for daily use
- 40+ detailed docs in /docs folder
- Backend as submodule from Gitea

Everything documented for autonomous operation.

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hector Ros
2026-01-20 00:36:53 +01:00
commit db71705842
49 changed files with 19162 additions and 0 deletions

495
docs/06-deployment/ci-cd.md Normal file
View File

@@ -0,0 +1,495 @@
# CI/CD Pipeline
## Arquitectura CI/CD
```
Git Push → Gitea Webhook → Backend → BullMQ → Deploy Worker → K8s
Notifications
```
## Gitea Actions (GitHub Actions compatible)
### Workflow para Backend
```yaml
# .gitea/workflows/backend.yml
name: Backend CI/CD
on:
push:
branches: [main, develop, staging]
paths:
- 'backend/**'
pull_request:
branches: [main, develop]
paths:
- 'backend/**'
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v1
with:
bun-version: 1.3.6
- name: Install dependencies
working-directory: ./backend
run: bun install
- name: Run linter
working-directory: ./backend
run: bun run lint
- name: Run tests
working-directory: ./backend
run: bun test
build:
needs: test
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/staging'
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Registry
uses: docker/login-action@v3
with:
registry: ${{ secrets.DOCKER_REGISTRY }}
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Build and push
uses: docker/build-push-action@v5
with:
context: ./backend
push: true
tags: |
${{ secrets.DOCKER_REGISTRY }}/aiworker-backend:${{ github.sha }}
${{ secrets.DOCKER_REGISTRY }}/aiworker-backend:latest
cache-from: type=registry,ref=${{ secrets.DOCKER_REGISTRY }}/aiworker-backend:buildcache
cache-to: type=registry,ref=${{ secrets.DOCKER_REGISTRY }}/aiworker-backend:buildcache,mode=max
deploy:
needs: build
runs-on: ubuntu-latest
steps:
- name: Trigger deployment
run: |
curl -X POST ${{ secrets.AIWORKER_API_URL }}/api/deployments \
-H "Authorization: Bearer ${{ secrets.AIWORKER_TOKEN }}" \
-H "Content-Type: application/json" \
-d '{
"projectId": "backend",
"environment": "${{ github.ref == 'refs/heads/main' && 'production' || 'staging' }}",
"commitHash": "${{ github.sha }}",
"branch": "${{ github.ref_name }}"
}'
```
### Workflow para Frontend
```yaml
# .gitea/workflows/frontend.yml
name: Frontend CI/CD
on:
push:
branches: [main, staging]
paths:
- 'frontend/**'
jobs:
build-and-deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v1
with:
bun-version: 1.3.6
- name: Install and build
working-directory: ./frontend
run: |
bun install
bun run build
- name: Build Docker image
run: |
docker build -t aiworker-frontend:${{ github.sha }} ./frontend
docker tag aiworker-frontend:${{ github.sha }} aiworker-frontend:latest
- name: Push to registry
run: |
echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin
docker push aiworker-frontend:${{ github.sha }}
docker push aiworker-frontend:latest
- name: Deploy
run: |
kubectl set image deployment/frontend frontend=aiworker-frontend:${{ github.sha }} -n control-plane
```
## Webhooks Handler
```typescript
// services/gitea/webhooks.ts
export async function handlePushEvent(payload: any) {
const { ref, commits, repository } = payload
const branch = ref.replace('refs/heads/', '')
logger.info(`Push to ${repository.full_name}:${branch}`, {
commits: commits.length,
})
// Find project by repo
const project = await db.query.projects.findFirst({
where: eq(projects.giteaRepoUrl, repository.clone_url),
})
if (!project) {
logger.warn('Project not found for repo:', repository.clone_url)
return
}
// Determine environment based on branch
let environment: 'dev' | 'staging' | 'production' | null = null
if (branch === 'main' || branch === 'master') {
environment = 'production'
} else if (branch === 'staging') {
environment = 'staging'
} else if (branch === 'develop' || branch === 'dev') {
environment = 'dev'
}
if (!environment) {
logger.debug('Ignoring push to non-deployment branch:', branch)
return
}
// Create deployment
const deploymentId = crypto.randomUUID()
const commitHash = commits[commits.length - 1].id
await db.insert(deployments).values({
id: deploymentId,
projectId: project.id,
environment,
deploymentType: 'automatic',
branch,
commitHash,
status: 'pending',
})
// Enqueue deployment job
await enqueueDeploy({
deploymentId,
projectId: project.id,
environment,
branch,
commitHash,
})
logger.info(`Deployment queued: ${environment} for ${project.name}`)
}
```
## Manual Deployment
```typescript
// api/routes/deployments.ts
router.post('/deployments', async (req, res) => {
const { projectId, environment, commitHash, branch } = req.body
// Validate
const project = await db.query.projects.findFirst({
where: eq(projects.id, projectId),
})
if (!project) {
return res.status(404).json({ error: 'Project not found' })
}
// Create deployment record
const deploymentId = crypto.randomUUID()
await db.insert(deployments).values({
id: deploymentId,
projectId,
environment,
deploymentType: 'manual',
branch,
commitHash,
status: 'pending',
triggeredBy: req.user?.id,
})
// Enqueue
await enqueueDeploy({
deploymentId,
projectId,
environment,
branch,
commitHash,
})
res.status(201).json({
deploymentId,
status: 'pending',
})
})
```
## Deployment Worker
```typescript
// services/queue/deploy-worker.ts
import { Worker } from 'bullmq'
import { K8sClient } from '../kubernetes/client'
import { db } from '../../db/client'
import { deployments } from '../../db/schema'
import { eq } from 'drizzle-orm'
const k8sClient = new K8sClient()
export const deployWorker = new Worker(
'deploys',
async (job) => {
const { deploymentId, projectId, environment, branch, commitHash } = job.data
logger.info(`Starting deployment: ${environment}`, { deploymentId })
// Update status
await db.update(deployments)
.set({
status: 'in_progress',
startedAt: new Date(),
})
.where(eq(deployments.id, deploymentId))
job.updateProgress(10)
try {
// Get project config
const project = await db.query.projects.findFirst({
where: eq(projects.id, projectId),
})
if (!project) {
throw new Error('Project not found')
}
job.updateProgress(20)
// Build image tag
const imageTag = `${project.dockerImage}:${commitHash.slice(0, 7)}`
// Determine namespace
const namespace =
environment === 'production'
? `${project.k8sNamespace}-prod`
: environment === 'staging'
? `${project.k8sNamespace}-staging`
: `${project.k8sNamespace}-dev`
job.updateProgress(30)
// Create/update deployment
await k8sClient.createOrUpdateDeployment({
namespace,
name: `${project.name}-${environment}`,
image: imageTag,
envVars: project.envVars as Record<string, string>,
replicas: environment === 'production' ? project.replicas : 1,
resources: {
cpu: project.cpuLimit || '500m',
memory: project.memoryLimit || '512Mi',
},
})
job.updateProgress(70)
// Create/update service
await k8sClient.createOrUpdateService({
namespace,
name: `${project.name}-${environment}`,
port: 3000,
})
job.updateProgress(80)
// Create/update ingress
const host =
environment === 'production'
? `${project.name}.aiworker.dev`
: `${environment}-${project.name}.aiworker.dev`
const url = await k8sClient.createOrUpdateIngress({
namespace,
name: `${project.name}-${environment}`,
host,
serviceName: `${project.name}-${environment}`,
servicePort: 3000,
})
job.updateProgress(90)
// Wait for deployment to be ready
await k8sClient.waitForDeployment(namespace, `${project.name}-${environment}`, 300)
job.updateProgress(100)
// Update deployment as completed
const completedAt = new Date()
const durationSeconds = Math.floor(
(completedAt.getTime() - job.processedOn!) / 1000
)
await db.update(deployments)
.set({
status: 'completed',
completedAt,
url,
durationSeconds,
})
.where(eq(deployments.id, deploymentId))
// Emit event
emitWebSocketEvent('deploy:completed', {
deploymentId,
environment,
url,
})
logger.info(`Deployment completed: ${environment}${url}`)
return { success: true, url }
} catch (error: any) {
logger.error('Deployment failed:', error)
// Update as failed
await db.update(deployments)
.set({
status: 'failed',
errorMessage: error.message,
completedAt: new Date(),
})
.where(eq(deployments.id, deploymentId))
// Emit event
emitWebSocketEvent('deploy:failed', {
deploymentId,
environment,
error: error.message,
})
throw error
}
},
{
connection: getRedis(),
concurrency: 3,
}
)
```
## Rollback
```typescript
// api/routes/deployments.ts
router.post('/deployments/:id/rollback', async (req, res) => {
const { id } = req.params
// Get deployment
const deployment = await db.query.deployments.findFirst({
where: eq(deployments.id, id),
})
if (!deployment) {
return res.status(404).json({ error: 'Deployment not found' })
}
// Get previous successful deployment
const previousDeployment = await db.query.deployments.findFirst({
where: and(
eq(deployments.projectId, deployment.projectId),
eq(deployments.environment, deployment.environment),
eq(deployments.status, 'completed'),
lt(deployments.createdAt, deployment.createdAt)
),
orderBy: [desc(deployments.createdAt)],
})
if (!previousDeployment) {
return res.status(400).json({ error: 'No previous deployment to rollback to' })
}
// Create rollback deployment
const rollbackId = crypto.randomUUID()
await db.insert(deployments).values({
id: rollbackId,
projectId: deployment.projectId,
environment: deployment.environment,
deploymentType: 'rollback',
branch: previousDeployment.branch,
commitHash: previousDeployment.commitHash,
status: 'pending',
triggeredBy: req.user?.id,
})
// Enqueue
await enqueueDeploy({
deploymentId: rollbackId,
projectId: deployment.projectId,
environment: deployment.environment,
branch: previousDeployment.branch!,
commitHash: previousDeployment.commitHash!,
})
res.json({
deploymentId: rollbackId,
rollingBackTo: previousDeployment.commitHash,
})
})
```
## Health Checks Post-Deploy
```typescript
async function verifyDeployment(url: string): Promise<boolean> {
const maxAttempts = 10
const delayMs = 3000
for (let i = 0; i < maxAttempts; i++) {
try {
const response = await fetch(`${url}/health`, {
method: 'GET',
signal: AbortSignal.timeout(5000),
})
if (response.ok) {
logger.info(`Deployment healthy: ${url}`)
return true
}
} catch (error) {
logger.debug(`Health check attempt ${i + 1} failed`)
}
await new Promise((resolve) => setTimeout(resolve, delayMs))
}
logger.error(`Deployment failed health checks: ${url}`)
return false
}
```

View File

@@ -0,0 +1,531 @@
# GitOps con ArgoCD
## ¿Qué es GitOps?
GitOps usa Git como fuente única de verdad para infraestructura y aplicaciones. Los cambios se hacen via commits, y herramientas como ArgoCD sincronizan automáticamente el estado deseado en Git con el estado real en Kubernetes.
## Instalación de ArgoCD
```bash
# Create namespace
kubectl create namespace argocd
# Install ArgoCD
kubectl apply -n argocd -f https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml
# Wait for pods
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=argocd-server -n argocd --timeout=300s
# Get initial admin password
kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" | base64 -d
# Port forward to access UI
kubectl port-forward svc/argocd-server -n argocd 8080:443
# Access at: https://localhost:8080
# Username: admin
# Password: (from above command)
```
## Estructura de Repositorio GitOps
```
gitops-repo/
├── projects/
│ ├── backend/
│ │ ├── base/
│ │ │ ├── deployment.yaml
│ │ │ ├── service.yaml
│ │ │ └── kustomization.yaml
│ │ ├── dev/
│ │ │ ├── kustomization.yaml
│ │ │ └── patches.yaml
│ │ ├── staging/
│ │ │ ├── kustomization.yaml
│ │ │ └── patches.yaml
│ │ └── production/
│ │ ├── kustomization.yaml
│ │ └── patches.yaml
│ │
│ └── my-project/
│ ├── base/
│ ├── dev/
│ ├── staging/
│ └── production/
└── argocd/
├── applications/
│ ├── backend-dev.yaml
│ ├── backend-staging.yaml
│ ├── backend-production.yaml
│ └── my-project-production.yaml
└── app-of-apps.yaml
```
## Base Manifests con Kustomize
```yaml
# projects/backend/base/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: backend
spec:
replicas: 2
selector:
matchLabels:
app: backend
template:
metadata:
labels:
app: backend
spec:
containers:
- name: backend
image: aiworker/backend:latest
ports:
- containerPort: 3000
env:
- name: NODE_ENV
value: production
resources:
requests:
cpu: 250m
memory: 512Mi
limits:
cpu: 1
memory: 2Gi
---
# projects/backend/base/service.yaml
apiVersion: v1
kind: Service
metadata:
name: backend
spec:
selector:
app: backend
ports:
- port: 3000
targetPort: 3000
---
# projects/backend/base/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- deployment.yaml
- service.yaml
commonLabels:
app: backend
managed-by: argocd
```
## Environment Overlays
```yaml
# projects/backend/production/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: control-plane
bases:
- ../base
patchesStrategicMerge:
- patches.yaml
images:
- name: aiworker/backend
newTag: v1.2.3 # This gets updated automatically
replicas:
- name: backend
count: 3
configMapGenerator:
- name: backend-config
literals:
- NODE_ENV=production
- LOG_LEVEL=info
---
# projects/backend/production/patches.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: backend
spec:
template:
spec:
containers:
- name: backend
resources:
requests:
cpu: 500m
memory: 1Gi
limits:
cpu: 2
memory: 4Gi
```
## ArgoCD Application
```yaml
# argocd/applications/backend-production.yaml
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: backend-production
namespace: argocd
spec:
project: default
source:
repoURL: https://git.aiworker.dev/aiworker/gitops
targetRevision: HEAD
path: projects/backend/production
destination:
server: https://kubernetes.default.svc
namespace: control-plane
syncPolicy:
automated:
prune: true
selfHeal: true
allowEmpty: false
syncOptions:
- CreateNamespace=false
retry:
limit: 5
backoff:
duration: 5s
factor: 2
maxDuration: 3m
revisionHistoryLimit: 10
```
## App of Apps Pattern
```yaml
# argocd/app-of-apps.yaml
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: aiworker-apps
namespace: argocd
spec:
project: default
source:
repoURL: https://git.aiworker.dev/aiworker/gitops
targetRevision: HEAD
path: argocd/applications
destination:
server: https://kubernetes.default.svc
namespace: argocd
syncPolicy:
automated:
prune: true
selfHeal: true
```
## Actualización de Imagen desde Backend
```typescript
// services/gitops/updater.ts
import { Octokit } from '@octokit/rest'
import yaml from 'js-yaml'
import { logger } from '../../utils/logger'
export class GitOpsUpdater {
private octokit: Octokit
private repo: string
private owner: string
constructor() {
this.octokit = new Octokit({
baseUrl: process.env.GITEA_URL,
auth: process.env.GITEA_TOKEN,
})
this.repo = 'gitops'
this.owner = 'aiworker'
}
async updateImage(params: {
project: string
environment: string
imageTag: string
}) {
const { project, environment, imageTag } = params
const path = `projects/${project}/${environment}/kustomization.yaml`
logger.info(`Updating GitOps: ${project}/${environment}${imageTag}`)
try {
// 1. Get current file
const { data: fileData } = await this.octokit.repos.getContent({
owner: this.owner,
repo: this.repo,
path,
})
if (Array.isArray(fileData) || fileData.type !== 'file') {
throw new Error('Invalid file')
}
// 2. Decode content
const content = Buffer.from(fileData.content, 'base64').toString('utf-8')
const kustomization = yaml.load(content) as any
// 3. Update image tag
if (!kustomization.images) {
kustomization.images = []
}
const imageIndex = kustomization.images.findIndex(
(img: any) => img.name === `aiworker/${project}`
)
if (imageIndex >= 0) {
kustomization.images[imageIndex].newTag = imageTag
} else {
kustomization.images.push({
name: `aiworker/${project}`,
newTag: imageTag,
})
}
// 4. Encode new content
const newContent = yaml.dump(kustomization)
const newContentBase64 = Buffer.from(newContent).toString('base64')
// 5. Commit changes
await this.octokit.repos.createOrUpdateFileContents({
owner: this.owner,
repo: this.repo,
path,
message: `Update ${project} ${environment} to ${imageTag}`,
content: newContentBase64,
sha: fileData.sha,
})
logger.info(`GitOps updated: ${project}/${environment}`)
return { success: true }
} catch (error: any) {
logger.error('Failed to update GitOps:', error)
throw error
}
}
}
```
## Integración con CI/CD
```typescript
// services/queue/deploy-worker.ts
import { GitOpsUpdater } from '../gitops/updater'
const gitopsUpdater = new GitOpsUpdater()
export const deployWorker = new Worker('deploys', async (job) => {
const { deploymentId, projectId, environment, commitHash } = job.data
// ... deployment logic ...
// Update GitOps repo
await gitopsUpdater.updateImage({
project: project.name,
environment,
imageTag: commitHash.slice(0, 7),
})
// ArgoCD will automatically sync within 3 minutes
// Or trigger manual sync:
await triggerArgoCDSync(project.name, environment)
logger.info('GitOps updated, ArgoCD will sync')
})
```
## Trigger ArgoCD Sync
```typescript
// services/gitops/argocd.ts
export async function triggerArgoCDSync(project: string, environment: string) {
const appName = `${project}-${environment}`
const argoCDUrl = process.env.ARGOCD_URL || 'https://argocd.aiworker.dev'
const token = process.env.ARGOCD_TOKEN
const response = await fetch(`${argoCDUrl}/api/v1/applications/${appName}/sync`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${token}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
prune: false,
dryRun: false,
strategy: {
hook: {},
},
}),
})
if (!response.ok) {
throw new Error(`ArgoCD sync failed: ${response.statusText}`)
}
logger.info(`Triggered ArgoCD sync: ${appName}`)
}
```
## Health Status from ArgoCD
```typescript
// services/gitops/argocd.ts
export async function getApplicationStatus(appName: string) {
const argoCDUrl = process.env.ARGOCD_URL
const token = process.env.ARGOCD_TOKEN
const response = await fetch(`${argoCDUrl}/api/v1/applications/${appName}`, {
headers: {
'Authorization': `Bearer ${token}`,
},
})
const app = await response.json()
return {
syncStatus: app.status.sync.status, // Synced, OutOfSync
healthStatus: app.status.health.status, // Healthy, Progressing, Degraded
lastSyncedAt: app.status.operationState?.finishedAt,
}
}
```
## Monitoring Dashboard
```typescript
// api/routes/gitops.ts
router.get('/gitops/status', async (req, res) => {
const apps = ['backend-production', 'backend-staging', 'backend-dev']
const statuses = await Promise.all(
apps.map(async (app) => {
const status = await getApplicationStatus(app)
return {
name: app,
...status,
}
})
)
res.json({ applications: statuses })
})
```
## Benefits of GitOps
### 1. Declarative
Todo el estado deseado está en Git, versionado y auditable.
### 2. Auditabilidad
Cada cambio tiene un commit con autor, timestamp y descripción.
### 3. Rollback Fácil
```bash
# Rollback to previous version
git revert HEAD
git push
# ArgoCD automatically syncs back
```
### 4. Disaster Recovery
Cluster destruido? Simplemente:
```bash
# Reinstall ArgoCD
kubectl apply -f argocd-install.yaml
# Deploy app-of-apps
kubectl apply -f app-of-apps.yaml
# Todo vuelve al estado en Git
```
### 5. Multi-Cluster
```yaml
# Deploy same app to multiple clusters
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: backend-cluster-2
spec:
destination:
server: https://cluster-2.example.com
namespace: control-plane
# ... same source
```
## Best Practices
### 1. Separate Repo
Mantener GitOps separado del código de aplicación:
- **App repo**: Código fuente
- **GitOps repo**: Manifests de K8s
### 2. Environment Branches (Optional)
```
main → production
staging → staging environment
dev → dev environment
```
### 3. Secrets Management
No commitear secrets en Git. Usar:
- **Sealed Secrets**
- **External Secrets Operator**
- **Vault**
### 4. Progressive Rollout
```yaml
# Use Argo Rollouts for canary/blue-green
apiVersion: argoproj.io/v1alpha1
kind: Rollout
metadata:
name: backend
spec:
strategy:
canary:
steps:
- setWeight: 20
- pause: {duration: 5m}
- setWeight: 50
- pause: {duration: 5m}
- setWeight: 100
```
## Troubleshooting
```bash
# Ver estado de aplicación
argocd app get backend-production
# Ver diferencias
argocd app diff backend-production
# Sync manual
argocd app sync backend-production
# Ver logs
kubectl logs -n argocd deployment/argocd-application-controller
# Refresh (fetch latest from Git)
argocd app refresh backend-production
```

View File

@@ -0,0 +1,500 @@
# Preview Environments
Los preview environments son deployments temporales y aislados para cada tarea, permitiendo testing independiente antes del merge.
## Arquitectura
```
Task Branch
Build & Push Image
Create K8s Namespace (preview-task-{id})
Deploy App + Database (if needed)
Create Ingress (https://task-{id}.preview.aiworker.dev)
Ready for Testing
```
## Creación de Preview Environment
### 1. Trigger desde Agente
```typescript
// Agent completes task
await mcp.callTool('trigger_preview_deploy', {
taskId: task.id,
})
```
### 2. Backend Handler
```typescript
// services/mcp/handlers.ts
async function triggerPreviewDeploy(args: { taskId: string }) {
const task = await db.query.tasks.findFirst({
where: eq(tasks.id, args.taskId),
with: { project: true },
})
if (!task || !task.branchName) {
throw new Error('Task or branch not found')
}
const shortId = task.id.slice(0, 8)
const namespace = `preview-task-${shortId}`
const url = `https://task-${shortId}.preview.aiworker.dev`
// Create deployment job
const deploymentId = crypto.randomUUID()
await db.insert(deployments).values({
id: deploymentId,
projectId: task.projectId,
environment: 'preview',
branch: task.branchName,
commitHash: await getLatestCommit(task),
k8sNamespace: namespace,
status: 'pending',
})
// Enqueue
await enqueueDeploy({
deploymentId,
projectId: task.projectId,
taskId: task.id,
environment: 'preview',
branch: task.branchName,
namespace,
})
// Update task
await db.update(tasks)
.set({
state: 'ready_to_test',
previewNamespace: namespace,
previewUrl: url,
previewDeployedAt: new Date(),
})
.where(eq(tasks.id, task.id))
return {
content: [{
type: 'text',
text: JSON.stringify({ success: true, previewUrl: url, namespace }),
}],
}
}
```
### 3. Deploy Worker
```typescript
// services/queue/preview-deploy-worker.ts
export const previewDeployWorker = new Worker('deploys', async (job) => {
const { deploymentId, taskId, projectId, branch, namespace } = job.data
const project = await db.query.projects.findFirst({
where: eq(projects.id, projectId),
})
// 1. Create namespace with TTL annotation
await k8sClient.createNamespace(namespace, {
project: projectId,
environment: 'preview',
taskId,
ttl: '168h', // 7 days
'created-at': new Date().toISOString(),
})
job.updateProgress(20)
// 2. Build image (or use existing)
const imageTag = `${project.dockerImage}:${branch}`
job.updateProgress(40)
// 3. Deploy application
await k8sClient.createDeployment({
namespace,
name: `${project.name}-preview`,
image: imageTag,
replicas: 1,
envVars: {
...project.envVars,
NODE_ENV: 'preview',
PREVIEW_MODE: 'true',
},
resources: {
requests: { cpu: '250m', memory: '512Mi' },
limits: { cpu: '1', memory: '2Gi' },
},
})
job.updateProgress(60)
// 4. Create service
await k8sClient.createService({
namespace,
name: `${project.name}-preview`,
port: 3000,
})
job.updateProgress(70)
// 5. Create ingress with basic auth
const host = `task-${taskId.slice(0, 8)}.preview.aiworker.dev`
await k8sClient.createIngress({
namespace,
name: `${project.name}-preview`,
host,
serviceName: `${project.name}-preview`,
servicePort: 3000,
annotations: {
'nginx.ingress.kubernetes.io/auth-type': 'basic',
'nginx.ingress.kubernetes.io/auth-secret': 'preview-basic-auth',
'nginx.ingress.kubernetes.io/auth-realm': 'Preview Environment',
},
})
job.updateProgress(90)
// 6. Wait for ready
await k8sClient.waitForDeployment(namespace, `${project.name}-preview`, 300)
job.updateProgress(100)
// Update deployment record
await db.update(deployments)
.set({
status: 'completed',
url: `https://${host}`,
completedAt: new Date(),
})
.where(eq(deployments.id, deploymentId))
logger.info(`Preview environment ready: ${host}`)
return { success: true, url: `https://${host}` }
})
```
## Preview con Base de Datos
Para tareas que requieren DB, crear una instancia temporal:
```typescript
async function createPreviewWithDatabase(params: {
namespace: string
projectName: string
taskId: string
}) {
const { namespace, projectName } = params
// 1. Deploy MySQL/PostgreSQL ephemeral
await k8sClient.createDeployment({
namespace,
name: 'db',
image: 'mysql:8.0',
replicas: 1,
envVars: {
MYSQL_ROOT_PASSWORD: 'preview123',
MYSQL_DATABASE: projectName,
},
resources: {
requests: { cpu: '250m', memory: '512Mi' },
limits: { cpu: '500m', memory: '1Gi' },
},
})
// 2. Create service
await k8sClient.createService({
namespace,
name: 'db',
port: 3306,
})
// 3. Run migrations
await k8sClient.runJob({
namespace,
name: 'db-migrate',
image: `${projectName}:latest`,
command: ['npm', 'run', 'migrate'],
envVars: {
DB_HOST: 'db',
DB_PORT: '3306',
DB_PASSWORD: 'preview123',
},
})
// 4. Seed data (optional)
await k8sClient.runJob({
namespace,
name: 'db-seed',
image: `${projectName}:latest`,
command: ['npm', 'run', 'seed'],
envVars: {
DB_HOST: 'db',
DB_PORT: '3306',
DB_PASSWORD: 'preview123',
},
})
}
```
## Basic Auth para Preview
```bash
# Create htpasswd file
htpasswd -c auth preview
# Password: preview123
# Create secret in all preview namespaces
kubectl create secret generic preview-basic-auth \
--from-file=auth \
-n preview-task-abc123
```
```typescript
// Auto-create in new preview namespaces
async function createPreviewAuthSecret(namespace: string) {
const htpasswd = 'preview:$apr1$...' // pre-generated
await k8sClient.createSecret({
namespace,
name: 'preview-basic-auth',
data: {
auth: Buffer.from(htpasswd).toString('base64'),
},
})
}
```
## Frontend: Preview URL Display
```typescript
// components/tasks/TaskCard.tsx
{task.previewUrl && (
<a
href={task.previewUrl}
target="_blank"
rel="noopener noreferrer"
className="mt-2 flex items-center gap-2 text-sm text-primary-600 hover:underline"
onClick={(e) => e.stopPropagation()}
>
<ExternalLink className="w-4 h-4" />
Ver Preview
</a>
)}
{task.state === 'ready_to_test' && (
<div className="mt-3 p-3 bg-purple-50 border border-purple-200 rounded-lg">
<p className="text-sm font-medium text-purple-900">
Preview Environment Ready!
</p>
<p className="text-xs text-purple-700 mt-1">
Credentials: preview / preview123
</p>
<div className="flex gap-2 mt-2">
<a
href={task.previewUrl}
target="_blank"
rel="noopener noreferrer"
className="btn-primary text-xs"
>
Open Preview
</a>
<button
onClick={() => approveTask(task.id)}
className="btn-secondary text-xs"
>
Approve
</button>
</div>
</div>
)}
```
## Cleanup de Preview Environments
### Automático (TTL)
```typescript
// Cron job que corre cada hora
async function cleanupExpiredPreviews() {
const namespaces = await k8sClient.listNamespaces({
labelSelector: 'environment=preview',
})
for (const ns of namespaces) {
const createdAt = new Date(ns.metadata?.annotations?.['created-at'])
const ttlHours = parseInt(ns.metadata?.labels?.ttl || '168')
const ageHours = (Date.now() - createdAt.getTime()) / (1000 * 60 * 60)
if (ageHours > ttlHours) {
logger.info(`Cleaning up expired preview: ${ns.metadata.name}`)
// Delete namespace (cascades to all resources)
await k8sClient.deleteNamespace(ns.metadata.name)
// Update task
await db.update(tasks)
.set({
previewNamespace: null,
previewUrl: null,
})
.where(eq(tasks.previewNamespace, ns.metadata.name))
}
}
}
// Schedule
setInterval(cleanupExpiredPreviews, 3600000) // Every hour
```
### Manual
```typescript
// api/routes/tasks.ts
router.delete('/tasks/:id/preview', async (req, res) => {
const { id } = req.params
const task = await db.query.tasks.findFirst({
where: eq(tasks.id, id),
})
if (!task || !task.previewNamespace) {
return res.status(404).json({ error: 'Preview not found' })
}
// Delete namespace
await k8sClient.deleteNamespace(task.previewNamespace)
// Update task
await db.update(tasks)
.set({
previewNamespace: null,
previewUrl: null,
})
.where(eq(tasks.id, id))
res.json({ success: true })
})
```
## Resource Limits
Para prevenir abuse, aplicar límites estrictos en previews:
```yaml
apiVersion: v1
kind: ResourceQuota
metadata:
name: preview-quota
namespace: preview-task-abc123
spec:
hard:
requests.cpu: "500m"
requests.memory: "1Gi"
limits.cpu: "1"
limits.memory: "2Gi"
pods: "5"
services: "3"
```
## Logs de Preview
```typescript
// api/routes/tasks.ts
router.get('/tasks/:id/preview-logs', async (req, res) => {
const { id } = req.params
const task = await db.query.tasks.findFirst({
where: eq(tasks.id, id),
})
if (!task || !task.previewNamespace) {
return res.status(404).json({ error: 'Preview not found' })
}
const pods = await k8sClient.listPods(task.previewNamespace)
const appPod = pods.find((p) => p.metadata.labels.app)
if (!appPod) {
return res.status(404).json({ error: 'App pod not found' })
}
const logs = await k8sClient.getPodLogs(
task.previewNamespace,
appPod.metadata.name,
100 // tail lines
)
res.json({ logs })
})
```
## Monitoring
```typescript
// Get preview environments stats
router.get('/previews/stats', async (req, res) => {
const namespaces = await k8sClient.listNamespaces({
labelSelector: 'environment=preview',
})
const stats = {
total: namespaces.length,
totalCost: 0,
byAge: {
'<1h': 0,
'1-24h': 0,
'1-7d': 0,
'>7d': 0,
},
}
for (const ns of namespaces) {
const createdAt = new Date(ns.metadata?.annotations?.['created-at'])
const ageHours = (Date.now() - createdAt.getTime()) / (1000 * 60 * 60)
if (ageHours < 1) stats.byAge['<1h']++
else if (ageHours < 24) stats.byAge['1-24h']++
else if (ageHours < 168) stats.byAge['1-7d']++
else stats.byAge['>7d']++
// Estimate cost (example: $0.05/hour per namespace)
stats.totalCost += ageHours * 0.05
}
res.json(stats)
})
```
## Best Practices
1. **TTL**: Siempre configurar TTL para auto-cleanup
2. **Resource Limits**: Limitar CPU/memoria por preview
3. **Security**: Basic auth o limitación por IP
4. **Monitoring**: Alertar si muchos previews activos
5. **Cost Control**: Límite máximo de previews concurrentes
6. **Quick Spin-up**: Optimizar para <2min de deployment time
## Troubleshooting
```bash
# Ver todos los previews
kubectl get namespaces -l environment=preview
# Ver recursos de un preview
kubectl get all -n preview-task-abc123
# Ver logs de un preview
kubectl logs -n preview-task-abc123 deployment/app-preview
# Eliminar preview manualmente
kubectl delete namespace preview-task-abc123
```

View File

@@ -0,0 +1,660 @@
# Staging y Production Deployments
## Flujo de Promoción
```
Tareas Aprobadas
Merge a Staging
Deploy Staging
Tests Automáticos
Aprobación Manual
Merge a Production
Deploy Production
```
## Merge a Staging
### 1. Agrupar Tareas
```typescript
// api/routes/task-groups.ts
router.post('/task-groups', async (req, res) => {
const { projectId, taskIds, notes } = req.body
// Validate all tasks are approved
const tasks = await db.query.tasks.findMany({
where: inArray(tasks.id, taskIds),
})
const notApproved = tasks.filter((t) => t.state !== 'approved')
if (notApproved.length > 0) {
return res.status(400).json({
error: 'All tasks must be approved',
notApproved: notApproved.map((t) => t.id),
})
}
// Create task group
const groupId = crypto.randomUUID()
await db.insert(taskGroups).values({
id: groupId,
projectId,
taskIds: JSON.stringify(taskIds),
status: 'pending',
notes,
createdBy: req.user?.id,
})
// Enqueue merge job
await enqueueMerge({
taskGroupId: groupId,
projectId,
taskIds,
targetBranch: 'staging',
})
res.status(201).json({
taskGroupId: groupId,
status: 'pending',
})
})
```
### 2. Merge Worker
```typescript
// services/queue/merge-worker.ts
export const mergeWorker = new Worker('merges', async (job) => {
const { taskGroupId, projectId, taskIds, targetBranch } = job.data
logger.info(`Merging tasks to ${targetBranch}:`, taskIds)
const project = await db.query.projects.findFirst({
where: eq(projects.id, projectId),
})
const tasks = await db.query.tasks.findMany({
where: inArray(tasks.id, taskIds),
})
job.updateProgress(10)
// 1. Clone repo
const repoDir = `/tmp/merge-${taskGroupId}`
await exec(`git clone ${project.giteaRepoUrl} ${repoDir}`)
process.chdir(repoDir)
// 2. Checkout target branch
await exec(`git checkout ${targetBranch}`)
job.updateProgress(20)
// 3. Merge each task's branch
for (const task of tasks) {
if (!task.branchName) {
logger.warn(`Task ${task.id} has no branch, skipping`)
continue
}
try {
await exec(`git fetch origin ${task.branchName}`)
await exec(`git merge origin/${task.branchName} --no-ff -m "Merge task: ${task.title}"`)
logger.info(`Merged ${task.branchName}`)
job.updateProgress(20 + (40 / tasks.length))
} catch (error) {
logger.error(`Failed to merge ${task.branchName}:`, error)
// Create conflict resolution task
await db.update(tasks)
.set({ state: 'needs_input' })
.where(eq(tasks.id, task.id))
throw new Error(`Merge conflict in ${task.branchName}`)
}
}
job.updateProgress(60)
// 4. Push to staging
await exec(`git push origin ${targetBranch}`)
job.updateProgress(70)
// 5. Create staging PR (if using main as production)
if (targetBranch === 'staging') {
const pr = await giteaClient.createPullRequest(
project.giteaOwner,
project.giteaRepoName,
{
title: `Deploy to Production - ${new Date().toISOString().split('T')[0]}`,
body: generateStagingPRDescription(tasks),
head: 'staging',
base: 'main',
}
)
await db.update(taskGroups)
.set({
stagingBranch: 'staging',
stagingPrNumber: pr.number,
stagingPrUrl: pr.html_url,
})
.where(eq(taskGroups.id, taskGroupId))
}
job.updateProgress(80)
// 6. Update tasks
for (const task of tasks) {
await db.update(tasks)
.set({
state: 'staging',
deployedStagingAt: new Date(),
})
.where(eq(tasks.id, task.id))
}
// 7. Update task group
await db.update(taskGroups)
.set({ status: 'staging' })
.where(eq(taskGroups.id, taskGroupId))
job.updateProgress(90)
// 8. Trigger staging deployment
await enqueueDeploy({
deploymentId: crypto.randomUUID(),
projectId,
environment: 'staging',
branch: 'staging',
commitHash: await getLatestCommit(repoDir, 'staging'),
})
job.updateProgress(100)
logger.info(`Merge completed: ${taskGroupId}`)
return { success: true }
})
function generateStagingPRDescription(tasks: Task[]) {
return `
## Tasks Included
${tasks.map((t) => `- [x] ${t.title} (#${t.id.slice(0, 8)})`).join('\n')}
## Changes
${tasks.map((t) => `### ${t.title}\n${t.description}\n`).join('\n')}
## Testing Checklist
${tasks.map((t) => `- [ ] Test: ${t.title}`).join('\n')}
---
🤖 Generated by AiWorker
`.trim()
}
```
## Staging Deployment
```yaml
# projects/my-app/staging/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: my-app-staging
bases:
- ../base
images:
- name: aiworker/my-app
newTag: staging-abc123
replicas:
- name: my-app
count: 2
configMapGenerator:
- name: app-config
literals:
- NODE_ENV=staging
- LOG_LEVEL=debug
- SENTRY_ENVIRONMENT=staging
patchesStrategicMerge:
- patches.yaml
---
# projects/my-app/staging/patches.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: my-app
spec:
template:
spec:
containers:
- name: app
env:
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: staging-db-credentials
key: url
```
## Automated Tests en Staging
```typescript
// services/testing/staging-tests.ts
export async function runStagingTests(params: {
projectId: string
stagingUrl: string
}) {
const { projectId, stagingUrl } = params
logger.info(`Running staging tests for: ${stagingUrl}`)
const tests = [
testHealthEndpoint,
testAuthentication,
testCriticalFeatures,
testPerformance,
]
const results = []
for (const test of tests) {
try {
const result = await test(stagingUrl)
results.push({ test: test.name, passed: result.passed, details: result })
if (!result.passed) {
logger.error(`Test failed: ${test.name}`, result)
}
} catch (error) {
results.push({ test: test.name, passed: false, error: error.message })
}
}
const allPassed = results.every((r) => r.passed)
// Store results
await db.insert(testRuns).values({
id: crypto.randomUUID(),
projectId,
environment: 'staging',
results: JSON.stringify(results),
passed: allPassed,
runAt: new Date(),
})
return { allPassed, results }
}
async function testHealthEndpoint(baseUrl: string) {
const response = await fetch(`${baseUrl}/health`)
return {
passed: response.ok,
status: response.status,
}
}
async function testAuthentication(baseUrl: string) {
// Test login
const loginResponse = await fetch(`${baseUrl}/api/auth/login`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
email: 'test@example.com',
password: 'test123',
}),
})
return {
passed: loginResponse.ok,
hasToken: !!(await loginResponse.json()).token,
}
}
```
## Production Deployment
### 1. Aprobación Manual
```typescript
// api/routes/task-groups.ts
router.post('/task-groups/:id/approve-production', async (req, res) => {
const { id } = req.params
const taskGroup = await db.query.taskGroups.findFirst({
where: eq(taskGroups.id, id),
})
if (!taskGroup || taskGroup.status !== 'staging') {
return res.status(400).json({ error: 'Task group not ready for production' })
}
// Run final checks
const stagingTests = await getLatestTestResults(taskGroup.projectId, 'staging')
if (!stagingTests?.passed) {
return res.status(400).json({ error: 'Staging tests not passing' })
}
// Merge staging to main
await enqueueMerge({
taskGroupId: id,
projectId: taskGroup.projectId,
taskIds: JSON.parse(taskGroup.taskIds),
targetBranch: 'main',
})
// Update status
await db.update(taskGroups)
.set({ status: 'production' })
.where(eq(taskGroups.id, id))
res.json({ success: true, status: 'deploying' })
})
```
### 2. Production Deployment con Blue-Green
```typescript
// services/deployment/blue-green.ts
export async function blueGreenDeploy(params: {
projectId: string
namespace: string
newVersion: string
}) {
const { projectId, namespace, newVersion } = params
const project = await db.query.projects.findFirst({
where: eq(projects.id, projectId),
})
logger.info(`Blue-green deployment: ${project.name}${newVersion}`)
// 1. Deploy "green" (new version) alongside "blue" (current)
await k8sClient.createDeployment({
namespace,
name: `${project.name}-green`,
image: `${project.dockerImage}:${newVersion}`,
replicas: project.replicas,
envVars: project.envVars,
labels: {
app: project.name,
version: 'green',
},
})
// 2. Wait for green to be ready
await k8sClient.waitForDeployment(namespace, `${project.name}-green`, 300)
// 3. Run smoke tests on green
const greenUrl = await k8sClient.getServiceUrl(namespace, `${project.name}-green`)
const smokeTests = await runSmokeTests(greenUrl)
if (!smokeTests.passed) {
logger.error('Smoke tests failed on green deployment')
throw new Error('Smoke tests failed')
}
// 4. Switch service to point to green
await k8sClient.updateServiceSelector(namespace, project.name, {
app: project.name,
version: 'green',
})
logger.info('Traffic switched to green')
// 5. Wait 5 minutes for monitoring
await sleep(300000)
// 6. Check error rates
const errorRate = await getErrorRate(project.name, 5)
if (errorRate > 0.01) {
// >1% errors
logger.error('High error rate detected, rolling back')
// Rollback: switch service back to blue
await k8sClient.updateServiceSelector(namespace, project.name, {
app: project.name,
version: 'blue',
})
throw new Error('Rollback due to high error rate')
}
// 7. Delete blue (old version)
await k8sClient.deleteDeployment(namespace, `${project.name}-blue`)
// 8. Rename green to blue for next deployment
await k8sClient.patchDeployment(namespace, `${project.name}-green`, {
metadata: {
name: `${project.name}-blue`,
labels: { version: 'blue' },
},
})
logger.info('Blue-green deployment completed successfully')
return { success: true }
}
```
### 3. Production Deployment con Canary
```yaml
# Using Argo Rollouts
apiVersion: argoproj.io/v1alpha1
kind: Rollout
metadata:
name: my-app
namespace: my-app-production
spec:
replicas: 10
selector:
matchLabels:
app: my-app
template:
metadata:
labels:
app: my-app
spec:
containers:
- name: app
image: aiworker/my-app:v1.2.3
ports:
- containerPort: 3000
strategy:
canary:
steps:
# 10% of traffic
- setWeight: 10
- pause: {duration: 5m}
# Check metrics
- analysis:
templates:
- templateName: error-rate
args:
- name: service-name
value: my-app
# 50% of traffic
- setWeight: 50
- pause: {duration: 10m}
# Full rollout
- setWeight: 100
---
apiVersion: argoproj.io/v1alpha1
kind: AnalysisTemplate
metadata:
name: error-rate
spec:
args:
- name: service-name
metrics:
- name: error-rate
interval: 1m
successCondition: result < 0.01 # <1% errors
provider:
prometheus:
address: http://prometheus:9090
query: |
rate(http_requests_total{service="{{args.service-name}}",status=~"5.."}[5m])
/
rate(http_requests_total{service="{{args.service-name}}"}[5m])
```
## Rollback
```typescript
// api/routes/deployments.ts
router.post('/deployments/:id/rollback', async (req, res) => {
const { id } = req.params
const deployment = await db.query.deployments.findFirst({
where: eq(deployments.id, id),
})
if (!deployment || deployment.environment !== 'production') {
return res.status(400).json({ error: 'Can only rollback production' })
}
// Find previous successful deployment
const previous = await db.query.deployments.findFirst({
where: and(
eq(deployments.projectId, deployment.projectId),
eq(deployments.environment, 'production'),
eq(deployments.status, 'completed'),
lt(deployments.createdAt, deployment.createdAt)
),
orderBy: [desc(deployments.createdAt)],
})
if (!previous) {
return res.status(400).json({ error: 'No previous deployment found' })
}
logger.warn(`Rolling back to ${previous.commitHash}`)
// Create rollback deployment
const rollbackId = crypto.randomUUID()
await db.insert(deployments).values({
id: rollbackId,
projectId: deployment.projectId,
environment: 'production',
deploymentType: 'rollback',
branch: previous.branch,
commitHash: previous.commitHash,
status: 'pending',
triggeredBy: req.user?.id,
})
// Enqueue immediate deployment
await enqueueDeploy({
deploymentId: rollbackId,
projectId: deployment.projectId,
environment: 'production',
branch: previous.branch,
commitHash: previous.commitHash,
}, {
priority: 1, // Highest priority
})
res.json({
rollbackId,
rollingBackTo: previous.commitHash,
})
})
```
## Monitoring Production
```typescript
// services/monitoring/production-monitor.ts
export async function monitorProduction() {
const projects = await db.query.projects.findMany()
for (const project of projects) {
const metrics = await getProductionMetrics(project.name)
// Check error rate
if (metrics.errorRate > 0.05) {
// >5%
await alertTeam({
severity: 'critical',
message: `High error rate in ${project.name}: ${metrics.errorRate * 100}%`,
})
}
// Check response time
if (metrics.p95ResponseTime > 1000) {
// >1s
await alertTeam({
severity: 'warning',
message: `Slow response time in ${project.name}: ${metrics.p95ResponseTime}ms`,
})
}
// Check pod health
const pods = await k8sClient.listPods(`${project.k8sNamespace}-prod`)
const unhealthy = pods.filter((p) => p.status.phase !== 'Running')
if (unhealthy.length > 0) {
await alertTeam({
severity: 'warning',
message: `Unhealthy pods in ${project.name}: ${unhealthy.length}`,
})
}
}
}
// Run every minute
setInterval(monitorProduction, 60000)
```
## Best Practices
1. **Always test in staging first**
2. **Automated tests must pass before production**
3. **Use blue-green or canary for production**
4. **Monitor error rates closely after deployment**
5. **Have rollback plan ready**
6. **Deploy during low-traffic hours**
7. **Notify team before production deployment**
8. **Keep previous version running for quick rollback**
## Deployment Checklist
- [ ] All tasks tested in preview
- [ ] All tasks approved
- [ ] Merged to staging
- [ ] Staging tests passing
- [ ] Database migrations run (if any)
- [ ] Team notified
- [ ] Monitoring dashboards ready
- [ ] Rollback plan documented
- [ ] Deploy to production
- [ ] Monitor for 30 minutes
- [ ] Confirm success or rollback