Complete documentation for future sessions

- CLAUDE.md for AI agents to understand the codebase - GITEA-GUIDE.md centralizes all Gitea operations (API, Registry, Auth) - DEVELOPMENT-WORKFLOW.md explains complete dev process - ROADMAP.md, NEXT-SESSION.md for planning - QUICK-REFERENCE.md, TROUBLESHOOTING.md for daily use - 40+ detailed docs in /docs folder - Backend as submodule from Gitea Everything documented for autonomous operation. Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2026-01-20 00:36:53 +01:00
commit db71705842
49 changed files with 19162 additions and 0 deletions
--- a/docs/05-agents/claude-code-pods.md
+++ b/docs/05-agents/claude-code-pods.md
@@ -0,0 +1,499 @@
+# Claude Code Agents - Pods en Kubernetes
+
+## Dockerfile del Agente
+
+```dockerfile
+# Dockerfile
+FROM node:20-alpine
+
+# Install dependencies
+RUN apk add --no-cache \
+    git \
+    openssh-client \
+    curl \
+    bash \
+    vim
+
+# Install Bun
+RUN curl -fsSL https://bun.sh/install | bash
+ENV PATH="/root/.bun/bin:$PATH"
+
+# Install Claude Code CLI
+RUN npm install -g @anthropic-ai/claude-code
+
+# Create workspace
+WORKDIR /workspace
+
+# Copy agent scripts
+COPY scripts/agent-entrypoint.sh /usr/local/bin/
+COPY scripts/agent-loop.sh /usr/local/bin/
+RUN chmod +x /usr/local/bin/agent-*.sh
+
+# Git config
+RUN git config --global user.name "AiWorker Agent" && \
+    git config --global user.email "agent@aiworker.dev" && \
+    git config --global init.defaultBranch main
+
+# Setup SSH
+RUN mkdir -p /root/.ssh && \
+    ssh-keyscan -H git.aiworker.dev >> /root/.ssh/known_hosts
+
+ENTRYPOINT ["/usr/local/bin/agent-entrypoint.sh"]
+```
+
+## Agent Entrypoint Script
+
+```bash
+#!/bin/bash
+# scripts/agent-entrypoint.sh
+
+set -e
+
+echo "🤖 Starting AiWorker Agent..."
+echo "Agent ID: $AGENT_ID"
+
+# Setup SSH key
+if [ -n "$GIT_SSH_KEY" ]; then
+  echo "$GIT_SSH_KEY" > /root/.ssh/id_ed25519
+  chmod 600 /root/.ssh/id_ed25519
+fi
+
+# Configure Claude Code with MCP Server
+cat > /root/.claude-code/config.json <<EOF
+{
+  "mcpServers": {
+    "aiworker": {
+      "command": "curl",
+      "args": [
+        "-X", "POST",
+        "-H", "Content-Type: application/json",
+        "-H", "X-Agent-ID: $AGENT_ID",
+        "$MCP_SERVER_URL/rpc"
+      ]
+    }
+  }
+}
+EOF
+
+# Send heartbeat
+send_heartbeat() {
+  curl -s -X POST "$MCP_SERVER_URL/heartbeat" \
+    -H "Content-Type: application/json" \
+    -d "{\"agentId\":\"$AGENT_ID\",\"status\":\"$1\"}" > /dev/null 2>&1 || true
+}
+
+# Start heartbeat loop in background
+while true; do
+  send_heartbeat "idle"
+  sleep 30
+done &
+HEARTBEAT_PID=$!
+
+# Trap signals for graceful shutdown
+trap "kill $HEARTBEAT_PID; send_heartbeat 'offline'; exit 0" SIGTERM SIGINT
+
+# Start agent work loop
+exec /usr/local/bin/agent-loop.sh
+```
+
+## Agent Work Loop
+
+```bash
+#!/bin/bash
+# scripts/agent-loop.sh
+
+set -e
+
+echo "🔄 Starting agent work loop..."
+
+while true; do
+  echo "📋 Checking for tasks..."
+
+  # Get next task via MCP
+  TASK=$(curl -s -X POST "$MCP_SERVER_URL/tools/call" \
+    -H "Content-Type: application/json" \
+    -d "{
+      \"name\": \"get_next_task\",
+      \"arguments\": {
+        \"agentId\": \"$AGENT_ID\"
+      }
+    }")
+
+  TASK_ID=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.id // empty')
+
+  if [ -z "$TASK_ID" ] || [ "$TASK_ID" = "null" ]; then
+    echo "💤 No tasks available, waiting..."
+    sleep 10
+    continue
+  fi
+
+  echo "🎯 Got task: $TASK_ID"
+
+  # Extract task details
+  TASK_TITLE=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.title')
+  TASK_DESC=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.description')
+  PROJECT_REPO=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.project.giteaRepoUrl')
+
+  echo "📝 Task: $TASK_TITLE"
+  echo "📦 Repo: $PROJECT_REPO"
+
+  # Log activity
+  curl -s -X POST "$MCP_SERVER_URL/tools/call" \
+    -H "Content-Type: application/json" \
+    -d "{
+      \"name\": \"log_activity\",
+      \"arguments\": {
+        \"agentId\": \"$AGENT_ID\",
+        \"level\": \"info\",
+        \"message\": \"Starting task: $TASK_TITLE\"
+      }
+    }" > /dev/null
+
+  # Clone repository
+  REPO_DIR="/workspace/task-$TASK_ID"
+  if [ ! -d "$REPO_DIR" ]; then
+    echo "📥 Cloning repository..."
+    git clone "$PROJECT_REPO" "$REPO_DIR"
+  fi
+
+  cd "$REPO_DIR"
+
+  # Create branch via MCP
+  echo "🌿 Creating branch..."
+  BRANCH_RESULT=$(curl -s -X POST "$MCP_SERVER_URL/tools/call" \
+    -H "Content-Type: application/json" \
+    -d "{
+      \"name\": \"create_branch\",
+      \"arguments\": {
+        \"taskId\": \"$TASK_ID\"
+      }
+    }")
+
+  BRANCH_NAME=$(echo "$BRANCH_RESULT" | jq -r '.content[0].text | fromjson | .branchName')
+  echo "🌿 Branch: $BRANCH_NAME"
+
+  # Fetch and checkout
+  git fetch origin
+  git checkout "$BRANCH_NAME" 2>/dev/null || git checkout -b "$BRANCH_NAME"
+
+  # Start Claude Code session
+  echo "🧠 Starting Claude Code session..."
+
+  # Create task prompt
+  TASK_PROMPT="I need you to work on the following task:
+
+Title: $TASK_TITLE
+
+Description:
+$TASK_DESC
+
+Instructions:
+1. Analyze the codebase
+2. Implement the required changes
+3. Write tests if needed
+4. Commit your changes with clear messages
+5. When done, use the MCP tools to:
+   - create_pull_request with a summary
+   - trigger_preview_deploy
+   - update_task_status to 'ready_to_test'
+
+If you need clarification, use ask_user_question.
+
+Start working on this task now."
+
+  # Run Claude Code (with timeout of 2 hours)
+  timeout 7200 claude-code chat --message "$TASK_PROMPT" || {
+    STATUS=$?
+    if [ $STATUS -eq 124 ]; then
+      echo "⏰ Task timeout"
+      curl -s -X POST "$MCP_SERVER_URL/tools/call" \
+        -H "Content-Type: application/json" \
+        -d "{
+          \"name\": \"update_task_status\",
+          \"arguments\": {
+            \"taskId\": \"$TASK_ID\",
+            \"status\": \"needs_input\",
+            \"metadata\": {\"reason\": \"timeout\"}
+          }
+        }" > /dev/null
+    else
+      echo "❌ Claude Code exited with status $STATUS"
+    fi
+  }
+
+  echo "✅ Task completed: $TASK_ID"
+
+  # Cleanup
+  cd /workspace
+  rm -rf "$REPO_DIR"
+
+  # Brief pause before next task
+  sleep 5
+done
+```
+
+## Pod Specification
+
+```yaml
+# k8s/agents/claude-agent-pod.yaml
+apiVersion: v1
+kind: Pod
+metadata:
+  name: claude-agent-{{ AGENT_ID }}
+  namespace: agents
+  labels:
+    app: claude-agent
+    agent-id: "{{ AGENT_ID }}"
+    managed-by: aiworker
+spec:
+  restartPolicy: Never
+  serviceAccountName: claude-agent
+
+  containers:
+  - name: agent
+    image: aiworker/claude-agent:latest
+    imagePullPolicy: Always
+
+    env:
+    - name: AGENT_ID
+      value: "{{ AGENT_ID }}"
+
+    - name: MCP_SERVER_URL
+      value: "http://aiworker-backend.control-plane.svc.cluster.local:3100"
+
+    - name: ANTHROPIC_API_KEY
+      valueFrom:
+        secretKeyRef:
+          name: aiworker-secrets
+          key: anthropic-api-key
+
+    - name: GITEA_URL
+      value: "http://gitea.gitea.svc.cluster.local:3000"
+
+    - name: GIT_SSH_KEY
+      valueFrom:
+        secretKeyRef:
+          name: git-ssh-keys
+          key: private-key
+
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "2"
+        memory: "4Gi"
+
+    volumeMounts:
+    - name: workspace
+      mountPath: /workspace
+
+  volumes:
+  - name: workspace
+    emptyDir:
+      sizeLimit: 10Gi
+```
+
+## Agent Manager (Backend)
+
+```typescript
+// services/kubernetes/agent-manager.ts
+import { K8sClient } from './client'
+import { db } from '../../db/client'
+import { agents } from '../../db/schema'
+import { eq } from 'drizzle-orm'
+import crypto from 'crypto'
+import { logger } from '../../utils/logger'
+
+export class AgentManager {
+  private k8sClient: K8sClient
+
+  constructor() {
+    this.k8sClient = new K8sClient()
+  }
+
+  async createAgent(capabilities: string[] = []) {
+    const agentId = crypto.randomUUID()
+
+    // Create agent pod in K8s
+    const { podName, namespace } = await this.k8sClient.createAgentPod(agentId)
+
+    // Insert in database
+    await db.insert(agents).values({
+      id: agentId,
+      podName,
+      k8sNamespace: namespace,
+      status: 'initializing',
+      capabilities,
+      lastHeartbeat: new Date(),
+    })
+
+    logger.info(`Created agent: ${agentId}`)
+
+    return {
+      id: agentId,
+      podName,
+      namespace,
+    }
+  }
+
+  async deleteAgent(agentId: string) {
+    const agent = await db.query.agents.findFirst({
+      where: eq(agents.id, agentId),
+    })
+
+    if (!agent) {
+      throw new Error('Agent not found')
+    }
+
+    // Delete pod
+    await this.k8sClient.deletePod(agent.k8sNamespace, agent.podName)
+
+    // Delete from database
+    await db.delete(agents).where(eq(agents.id, agentId))
+
+    logger.info(`Deleted agent: ${agentId}`)
+  }
+
+  async scaleAgents(targetCount: number) {
+    const currentAgents = await db.query.agents.findMany()
+
+    if (currentAgents.length < targetCount) {
+      // Scale up
+      const toCreate = targetCount - currentAgents.length
+      logger.info(`Scaling up: creating ${toCreate} agents`)
+
+      for (let i = 0; i < toCreate; i++) {
+        await this.createAgent()
+        await new Promise(resolve => setTimeout(resolve, 1000)) // Stagger creation
+      }
+    } else if (currentAgents.length > targetCount) {
+      // Scale down
+      const toDelete = currentAgents.length - targetCount
+      logger.info(`Scaling down: deleting ${toDelete} agents`)
+
+      // Delete idle agents first
+      const idleAgents = currentAgents.filter(a => a.status === 'idle').slice(0, toDelete)
+
+      for (const agent of idleAgents) {
+        await this.deleteAgent(agent.id)
+      }
+    }
+  }
+
+  async autoScale() {
+    // Get pending tasks
+    const pendingTasks = await db.query.tasks.findMany({
+      where: eq(tasks.state, 'backlog'),
+    })
+
+    // Get available agents
+    const availableAgents = await db.query.agents.findMany({
+      where: eq(agents.status, 'idle'),
+    })
+
+    const busyAgents = await db.query.agents.findMany({
+      where: eq(agents.status, 'busy'),
+    })
+
+    const totalAgents = availableAgents.length + busyAgents.length
+
+    // Simple scaling logic
+    const targetAgents = Math.min(
+      Math.max(2, pendingTasks.length, busyAgents.length + 1), // At least 2, max 1 per pending task
+      10 // Max 10 agents
+    )
+
+    if (targetAgents !== totalAgents) {
+      logger.info(`Auto-scaling agents: ${totalAgents} → ${targetAgents}`)
+      await this.scaleAgents(targetAgents)
+    }
+  }
+
+  async cleanupStaleAgents() {
+    const staleThreshold = new Date(Date.now() - 5 * 60 * 1000) // 5 minutes
+
+    const staleAgents = await db.query.agents.findMany({
+      where: (agents, { lt }) => lt(agents.lastHeartbeat, staleThreshold),
+    })
+
+    for (const agent of staleAgents) {
+      logger.warn(`Cleaning up stale agent: ${agent.id}`)
+      await this.deleteAgent(agent.id)
+    }
+  }
+}
+
+// Start autoscaler
+setInterval(async () => {
+  const manager = new AgentManager()
+  await manager.autoScale()
+  await manager.cleanupStaleAgents()
+}, 30000) // Every 30 seconds
+```
+
+## Agent Logs Streaming
+
+```typescript
+// api/routes/agents.ts
+import { Router } from 'express'
+import { K8sClient } from '../../services/kubernetes/client'
+import { db } from '../../db/client'
+import { agents } from '../../db/schema'
+import { eq } from 'drizzle-orm'
+
+const router = Router()
+const k8sClient = new K8sClient()
+
+router.get('/:agentId/logs/stream', async (req, res) => {
+  const { agentId } = req.params
+
+  const agent = await db.query.agents.findFirst({
+    where: eq(agents.id, agentId),
+  })
+
+  if (!agent) {
+    return res.status(404).json({ error: 'Agent not found' })
+  }
+
+  res.setHeader('Content-Type', 'text/event-stream')
+  res.setHeader('Cache-Control', 'no-cache')
+  res.setHeader('Connection', 'keep-alive')
+
+  try {
+    const logStream = await k8sClient.streamPodLogs(agent.k8sNamespace, agent.podName)
+
+    logStream.on('data', (chunk) => {
+      res.write(`data: ${chunk.toString()}\n\n`)
+    })
+
+    logStream.on('end', () => {
+      res.end()
+    })
+
+    req.on('close', () => {
+      logStream.destroy()
+    })
+  } catch (error) {
+    res.status(500).json({ error: 'Failed to stream logs' })
+  }
+})
+
+export default router
+```
+
+## Monitoring Agents
+
+```bash
+# Ver todos los agentes
+kubectl get pods -n agents -l app=claude-agent
+
+# Ver logs de un agente
+kubectl logs -n agents claude-agent-abc123 -f
+
+# Entrar a un agente
+kubectl exec -it -n agents claude-agent-abc123 -- /bin/bash
+
+# Ver recursos consumidos
+kubectl top pods -n agents
+```