# Claude Code Agents - Pods en Kubernetes ## Dockerfile del Agente ```dockerfile # Dockerfile FROM node:20-alpine # Install dependencies RUN apk add --no-cache \ git \ openssh-client \ curl \ bash \ vim # Install Bun RUN curl -fsSL https://bun.sh/install | bash ENV PATH="/root/.bun/bin:$PATH" # Install Claude Code CLI RUN npm install -g @anthropic-ai/claude-code # Create workspace WORKDIR /workspace # Copy agent scripts COPY scripts/agent-entrypoint.sh /usr/local/bin/ COPY scripts/agent-loop.sh /usr/local/bin/ RUN chmod +x /usr/local/bin/agent-*.sh # Git config RUN git config --global user.name "AiWorker Agent" && \ git config --global user.email "agent@aiworker.dev" && \ git config --global init.defaultBranch main # Setup SSH RUN mkdir -p /root/.ssh && \ ssh-keyscan -H git.aiworker.dev >> /root/.ssh/known_hosts ENTRYPOINT ["/usr/local/bin/agent-entrypoint.sh"] ``` ## Agent Entrypoint Script ```bash #!/bin/bash # scripts/agent-entrypoint.sh set -e echo "🤖 Starting AiWorker Agent..." echo "Agent ID: $AGENT_ID" # Setup SSH key if [ -n "$GIT_SSH_KEY" ]; then echo "$GIT_SSH_KEY" > /root/.ssh/id_ed25519 chmod 600 /root/.ssh/id_ed25519 fi # Configure Claude Code with MCP Server cat > /root/.claude-code/config.json < /dev/null 2>&1 || true } # Start heartbeat loop in background while true; do send_heartbeat "idle" sleep 30 done & HEARTBEAT_PID=$! # Trap signals for graceful shutdown trap "kill $HEARTBEAT_PID; send_heartbeat 'offline'; exit 0" SIGTERM SIGINT # Start agent work loop exec /usr/local/bin/agent-loop.sh ``` ## Agent Work Loop ```bash #!/bin/bash # scripts/agent-loop.sh set -e echo "🔄 Starting agent work loop..." while true; do echo "📋 Checking for tasks..." # Get next task via MCP TASK=$(curl -s -X POST "$MCP_SERVER_URL/tools/call" \ -H "Content-Type: application/json" \ -d "{ \"name\": \"get_next_task\", \"arguments\": { \"agentId\": \"$AGENT_ID\" } }") TASK_ID=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.id // empty') if [ -z "$TASK_ID" ] || [ "$TASK_ID" = "null" ]; then echo "💤 No tasks available, waiting..." sleep 10 continue fi echo "🎯 Got task: $TASK_ID" # Extract task details TASK_TITLE=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.title') TASK_DESC=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.description') PROJECT_REPO=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.project.giteaRepoUrl') echo "📝 Task: $TASK_TITLE" echo "📦 Repo: $PROJECT_REPO" # Log activity curl -s -X POST "$MCP_SERVER_URL/tools/call" \ -H "Content-Type: application/json" \ -d "{ \"name\": \"log_activity\", \"arguments\": { \"agentId\": \"$AGENT_ID\", \"level\": \"info\", \"message\": \"Starting task: $TASK_TITLE\" } }" > /dev/null # Clone repository REPO_DIR="/workspace/task-$TASK_ID" if [ ! -d "$REPO_DIR" ]; then echo "📥 Cloning repository..." git clone "$PROJECT_REPO" "$REPO_DIR" fi cd "$REPO_DIR" # Create branch via MCP echo "🌿 Creating branch..." BRANCH_RESULT=$(curl -s -X POST "$MCP_SERVER_URL/tools/call" \ -H "Content-Type: application/json" \ -d "{ \"name\": \"create_branch\", \"arguments\": { \"taskId\": \"$TASK_ID\" } }") BRANCH_NAME=$(echo "$BRANCH_RESULT" | jq -r '.content[0].text | fromjson | .branchName') echo "🌿 Branch: $BRANCH_NAME" # Fetch and checkout git fetch origin git checkout "$BRANCH_NAME" 2>/dev/null || git checkout -b "$BRANCH_NAME" # Start Claude Code session echo "🧠 Starting Claude Code session..." # Create task prompt TASK_PROMPT="I need you to work on the following task: Title: $TASK_TITLE Description: $TASK_DESC Instructions: 1. Analyze the codebase 2. Implement the required changes 3. Write tests if needed 4. Commit your changes with clear messages 5. When done, use the MCP tools to: - create_pull_request with a summary - trigger_preview_deploy - update_task_status to 'ready_to_test' If you need clarification, use ask_user_question. Start working on this task now." # Run Claude Code (with timeout of 2 hours) timeout 7200 claude-code chat --message "$TASK_PROMPT" || { STATUS=$? if [ $STATUS -eq 124 ]; then echo "⏰ Task timeout" curl -s -X POST "$MCP_SERVER_URL/tools/call" \ -H "Content-Type: application/json" \ -d "{ \"name\": \"update_task_status\", \"arguments\": { \"taskId\": \"$TASK_ID\", \"status\": \"needs_input\", \"metadata\": {\"reason\": \"timeout\"} } }" > /dev/null else echo "❌ Claude Code exited with status $STATUS" fi } echo "✅ Task completed: $TASK_ID" # Cleanup cd /workspace rm -rf "$REPO_DIR" # Brief pause before next task sleep 5 done ``` ## Pod Specification ```yaml # k8s/agents/claude-agent-pod.yaml apiVersion: v1 kind: Pod metadata: name: claude-agent-{{ AGENT_ID }} namespace: agents labels: app: claude-agent agent-id: "{{ AGENT_ID }}" managed-by: aiworker spec: restartPolicy: Never serviceAccountName: claude-agent containers: - name: agent image: aiworker/claude-agent:latest imagePullPolicy: Always env: - name: AGENT_ID value: "{{ AGENT_ID }}" - name: MCP_SERVER_URL value: "http://aiworker-backend.control-plane.svc.cluster.local:3100" - name: ANTHROPIC_API_KEY valueFrom: secretKeyRef: name: aiworker-secrets key: anthropic-api-key - name: GITEA_URL value: "http://gitea.gitea.svc.cluster.local:3000" - name: GIT_SSH_KEY valueFrom: secretKeyRef: name: git-ssh-keys key: private-key resources: requests: cpu: "500m" memory: "1Gi" limits: cpu: "2" memory: "4Gi" volumeMounts: - name: workspace mountPath: /workspace volumes: - name: workspace emptyDir: sizeLimit: 10Gi ``` ## Agent Manager (Backend) ```typescript // services/kubernetes/agent-manager.ts import { K8sClient } from './client' import { db } from '../../db/client' import { agents } from '../../db/schema' import { eq } from 'drizzle-orm' import crypto from 'crypto' import { logger } from '../../utils/logger' export class AgentManager { private k8sClient: K8sClient constructor() { this.k8sClient = new K8sClient() } async createAgent(capabilities: string[] = []) { const agentId = crypto.randomUUID() // Create agent pod in K8s const { podName, namespace } = await this.k8sClient.createAgentPod(agentId) // Insert in database await db.insert(agents).values({ id: agentId, podName, k8sNamespace: namespace, status: 'initializing', capabilities, lastHeartbeat: new Date(), }) logger.info(`Created agent: ${agentId}`) return { id: agentId, podName, namespace, } } async deleteAgent(agentId: string) { const agent = await db.query.agents.findFirst({ where: eq(agents.id, agentId), }) if (!agent) { throw new Error('Agent not found') } // Delete pod await this.k8sClient.deletePod(agent.k8sNamespace, agent.podName) // Delete from database await db.delete(agents).where(eq(agents.id, agentId)) logger.info(`Deleted agent: ${agentId}`) } async scaleAgents(targetCount: number) { const currentAgents = await db.query.agents.findMany() if (currentAgents.length < targetCount) { // Scale up const toCreate = targetCount - currentAgents.length logger.info(`Scaling up: creating ${toCreate} agents`) for (let i = 0; i < toCreate; i++) { await this.createAgent() await new Promise(resolve => setTimeout(resolve, 1000)) // Stagger creation } } else if (currentAgents.length > targetCount) { // Scale down const toDelete = currentAgents.length - targetCount logger.info(`Scaling down: deleting ${toDelete} agents`) // Delete idle agents first const idleAgents = currentAgents.filter(a => a.status === 'idle').slice(0, toDelete) for (const agent of idleAgents) { await this.deleteAgent(agent.id) } } } async autoScale() { // Get pending tasks const pendingTasks = await db.query.tasks.findMany({ where: eq(tasks.state, 'backlog'), }) // Get available agents const availableAgents = await db.query.agents.findMany({ where: eq(agents.status, 'idle'), }) const busyAgents = await db.query.agents.findMany({ where: eq(agents.status, 'busy'), }) const totalAgents = availableAgents.length + busyAgents.length // Simple scaling logic const targetAgents = Math.min( Math.max(2, pendingTasks.length, busyAgents.length + 1), // At least 2, max 1 per pending task 10 // Max 10 agents ) if (targetAgents !== totalAgents) { logger.info(`Auto-scaling agents: ${totalAgents} → ${targetAgents}`) await this.scaleAgents(targetAgents) } } async cleanupStaleAgents() { const staleThreshold = new Date(Date.now() - 5 * 60 * 1000) // 5 minutes const staleAgents = await db.query.agents.findMany({ where: (agents, { lt }) => lt(agents.lastHeartbeat, staleThreshold), }) for (const agent of staleAgents) { logger.warn(`Cleaning up stale agent: ${agent.id}`) await this.deleteAgent(agent.id) } } } // Start autoscaler setInterval(async () => { const manager = new AgentManager() await manager.autoScale() await manager.cleanupStaleAgents() }, 30000) // Every 30 seconds ``` ## Agent Logs Streaming ```typescript // api/routes/agents.ts import { Router } from 'express' import { K8sClient } from '../../services/kubernetes/client' import { db } from '../../db/client' import { agents } from '../../db/schema' import { eq } from 'drizzle-orm' const router = Router() const k8sClient = new K8sClient() router.get('/:agentId/logs/stream', async (req, res) => { const { agentId } = req.params const agent = await db.query.agents.findFirst({ where: eq(agents.id, agentId), }) if (!agent) { return res.status(404).json({ error: 'Agent not found' }) } res.setHeader('Content-Type', 'text/event-stream') res.setHeader('Cache-Control', 'no-cache') res.setHeader('Connection', 'keep-alive') try { const logStream = await k8sClient.streamPodLogs(agent.k8sNamespace, agent.podName) logStream.on('data', (chunk) => { res.write(`data: ${chunk.toString()}\n\n`) }) logStream.on('end', () => { res.end() }) req.on('close', () => { logStream.destroy() }) } catch (error) { res.status(500).json({ error: 'Failed to stream logs' }) } }) export default router ``` ## Monitoring Agents ```bash # Ver todos los agentes kubectl get pods -n agents -l app=claude-agent # Ver logs de un agente kubectl logs -n agents claude-agent-abc123 -f # Entrar a un agente kubectl exec -it -n agents claude-agent-abc123 -- /bin/bash # Ver recursos consumidos kubectl top pods -n agents ```