aiworker/docs/05-agents/claude-code-pods.md

# Claude Code Agents - Pods en Kubernetes

## Dockerfile del Agente

```dockerfile
# Dockerfile
FROM node:20-alpine

# Install dependencies
RUN apk add --no-cache \
    git \
    openssh-client \
    curl \
    bash \
    vim

# Install Bun
RUN curl -fsSL https://bun.sh/install | bash
ENV PATH="/root/.bun/bin:$PATH"

# Install Claude Code CLI
RUN npm install -g @anthropic-ai/claude-code

# Create workspace
WORKDIR /workspace

# Copy agent scripts
COPY scripts/agent-entrypoint.sh /usr/local/bin/
COPY scripts/agent-loop.sh /usr/local/bin/
RUN chmod +x /usr/local/bin/agent-*.sh

# Git config
RUN git config --global user.name "AiWorker Agent" && \
    git config --global user.email "agent@aiworker.dev" && \
    git config --global init.defaultBranch main

# Setup SSH
RUN mkdir -p /root/.ssh && \
    ssh-keyscan -H git.aiworker.dev >> /root/.ssh/known_hosts

ENTRYPOINT ["/usr/local/bin/agent-entrypoint.sh"]
```

## Agent Entrypoint Script

```bash
#!/bin/bash
# scripts/agent-entrypoint.sh

set -e

echo "🤖 Starting AiWorker Agent..."
echo "Agent ID: $AGENT_ID"

# Setup SSH key
if [ -n "$GIT_SSH_KEY" ]; then
  echo "$GIT_SSH_KEY" > /root/.ssh/id_ed25519
  chmod 600 /root/.ssh/id_ed25519
fi

# Configure Claude Code with MCP Server
cat > /root/.claude-code/config.json <<EOF
{
  "mcpServers": {
    "aiworker": {
      "command": "curl",
      "args": [
        "-X", "POST",
        "-H", "Content-Type: application/json",
        "-H", "X-Agent-ID: $AGENT_ID",
        "$MCP_SERVER_URL/rpc"
      ]
    }
  }
}
EOF

# Send heartbeat
send_heartbeat() {
  curl -s -X POST "$MCP_SERVER_URL/heartbeat" \
    -H "Content-Type: application/json" \
    -d "{\"agentId\":\"$AGENT_ID\",\"status\":\"$1\"}" > /dev/null 2>&1 || true
}

# Start heartbeat loop in background
while true; do
  send_heartbeat "idle"
  sleep 30
done &
HEARTBEAT_PID=$!

# Trap signals for graceful shutdown
trap "kill $HEARTBEAT_PID; send_heartbeat 'offline'; exit 0" SIGTERM SIGINT

# Start agent work loop
exec /usr/local/bin/agent-loop.sh
```

## Agent Work Loop

```bash
#!/bin/bash
# scripts/agent-loop.sh

set -e

echo "🔄 Starting agent work loop..."

while true; do
  echo "📋 Checking for tasks..."

  # Get next task via MCP
  TASK=$(curl -s -X POST "$MCP_SERVER_URL/tools/call" \
    -H "Content-Type: application/json" \
    -d "{
      \"name\": \"get_next_task\",
      \"arguments\": {
        \"agentId\": \"$AGENT_ID\"
      }
    }")

  TASK_ID=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.id // empty')

  if [ -z "$TASK_ID" ] || [ "$TASK_ID" = "null" ]; then
    echo "💤 No tasks available, waiting..."
    sleep 10
    continue
  fi

  echo "🎯 Got task: $TASK_ID"

  # Extract task details
  TASK_TITLE=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.title')
  TASK_DESC=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.description')
  PROJECT_REPO=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.project.giteaRepoUrl')

  echo "📝 Task: $TASK_TITLE"
  echo "📦 Repo: $PROJECT_REPO"

  # Log activity
  curl -s -X POST "$MCP_SERVER_URL/tools/call" \
    -H "Content-Type: application/json" \
    -d "{
      \"name\": \"log_activity\",
      \"arguments\": {
        \"agentId\": \"$AGENT_ID\",
        \"level\": \"info\",
        \"message\": \"Starting task: $TASK_TITLE\"
      }
    }" > /dev/null

  # Clone repository
  REPO_DIR="/workspace/task-$TASK_ID"
  if [ ! -d "$REPO_DIR" ]; then
    echo "📥 Cloning repository..."
    git clone "$PROJECT_REPO" "$REPO_DIR"
  fi

  cd "$REPO_DIR"

  # Create branch via MCP
  echo "🌿 Creating branch..."
  BRANCH_RESULT=$(curl -s -X POST "$MCP_SERVER_URL/tools/call" \
    -H "Content-Type: application/json" \
    -d "{
      \"name\": \"create_branch\",
      \"arguments\": {
        \"taskId\": \"$TASK_ID\"
      }
    }")

  BRANCH_NAME=$(echo "$BRANCH_RESULT" | jq -r '.content[0].text | fromjson | .branchName')
  echo "🌿 Branch: $BRANCH_NAME"

  # Fetch and checkout
  git fetch origin
  git checkout "$BRANCH_NAME" 2>/dev/null || git checkout -b "$BRANCH_NAME"

  # Start Claude Code session
  echo "🧠 Starting Claude Code session..."

  # Create task prompt
  TASK_PROMPT="I need you to work on the following task:

Title: $TASK_TITLE

Description:
$TASK_DESC

Instructions:
1. Analyze the codebase
2. Implement the required changes
3. Write tests if needed
4. Commit your changes with clear messages
5. When done, use the MCP tools to:
   - create_pull_request with a summary
   - trigger_preview_deploy
   - update_task_status to 'ready_to_test'

If you need clarification, use ask_user_question.

Start working on this task now."

  # Run Claude Code (with timeout of 2 hours)
  timeout 7200 claude-code chat --message "$TASK_PROMPT" || {
    STATUS=$?
    if [ $STATUS -eq 124 ]; then
      echo "⏰ Task timeout"
      curl -s -X POST "$MCP_SERVER_URL/tools/call" \
        -H "Content-Type: application/json" \
        -d "{
          \"name\": \"update_task_status\",
          \"arguments\": {
            \"taskId\": \"$TASK_ID\",
            \"status\": \"needs_input\",
            \"metadata\": {\"reason\": \"timeout\"}
          }
        }" > /dev/null
    else
      echo "❌ Claude Code exited with status $STATUS"
    fi
  }

  echo "✅ Task completed: $TASK_ID"

  # Cleanup
  cd /workspace
  rm -rf "$REPO_DIR"

  # Brief pause before next task
  sleep 5
done
```

## Pod Specification

```yaml
# k8s/agents/claude-agent-pod.yaml
apiVersion: v1
kind: Pod
metadata:
  name: claude-agent-{{ AGENT_ID }}
  namespace: agents
  labels:
    app: claude-agent
    agent-id: "{{ AGENT_ID }}"
    managed-by: aiworker
spec:
  restartPolicy: Never
  serviceAccountName: claude-agent

  containers:
  - name: agent
    image: aiworker/claude-agent:latest
    imagePullPolicy: Always

    env:
    - name: AGENT_ID
      value: "{{ AGENT_ID }}"

    - name: MCP_SERVER_URL
      value: "http://aiworker-backend.control-plane.svc.cluster.local:3100"

    - name: ANTHROPIC_API_KEY
      valueFrom:
        secretKeyRef:
          name: aiworker-secrets
          key: anthropic-api-key

    - name: GITEA_URL
      value: "http://gitea.gitea.svc.cluster.local:3000"

    - name: GIT_SSH_KEY
      valueFrom:
        secretKeyRef:
          name: git-ssh-keys
          key: private-key

    resources:
      requests:
        cpu: "500m"
        memory: "1Gi"
      limits:
        cpu: "2"
        memory: "4Gi"

    volumeMounts:
    - name: workspace
      mountPath: /workspace

  volumes:
  - name: workspace
    emptyDir:
      sizeLimit: 10Gi
```

## Agent Manager (Backend)

```typescript
// services/kubernetes/agent-manager.ts
import { K8sClient } from './client'
import { db } from '../../db/client'
import { agents } from '../../db/schema'
import { eq } from 'drizzle-orm'
import crypto from 'crypto'
import { logger } from '../../utils/logger'

export class AgentManager {
  private k8sClient: K8sClient

  constructor() {
    this.k8sClient = new K8sClient()
  }

  async createAgent(capabilities: string[] = []) {
    const agentId = crypto.randomUUID()

    // Create agent pod in K8s
    const { podName, namespace } = await this.k8sClient.createAgentPod(agentId)

    // Insert in database
    await db.insert(agents).values({
      id: agentId,
      podName,
      k8sNamespace: namespace,
      status: 'initializing',
      capabilities,
      lastHeartbeat: new Date(),
    })

    logger.info(`Created agent: ${agentId}`)

    return {
      id: agentId,
      podName,
      namespace,
    }
  }

  async deleteAgent(agentId: string) {
    const agent = await db.query.agents.findFirst({
      where: eq(agents.id, agentId),
    })

    if (!agent) {
      throw new Error('Agent not found')
    }

    // Delete pod
    await this.k8sClient.deletePod(agent.k8sNamespace, agent.podName)

    // Delete from database
    await db.delete(agents).where(eq(agents.id, agentId))

    logger.info(`Deleted agent: ${agentId}`)
  }

  async scaleAgents(targetCount: number) {
    const currentAgents = await db.query.agents.findMany()

    if (currentAgents.length < targetCount) {
      // Scale up
      const toCreate = targetCount - currentAgents.length
      logger.info(`Scaling up: creating ${toCreate} agents`)

      for (let i = 0; i < toCreate; i++) {
        await this.createAgent()
        await new Promise(resolve => setTimeout(resolve, 1000)) // Stagger creation
      }
    } else if (currentAgents.length > targetCount) {
      // Scale down
      const toDelete = currentAgents.length - targetCount
      logger.info(`Scaling down: deleting ${toDelete} agents`)

      // Delete idle agents first
      const idleAgents = currentAgents.filter(a => a.status === 'idle').slice(0, toDelete)

      for (const agent of idleAgents) {
        await this.deleteAgent(agent.id)
      }
    }
  }

  async autoScale() {
    // Get pending tasks
    const pendingTasks = await db.query.tasks.findMany({
      where: eq(tasks.state, 'backlog'),
    })

    // Get available agents
    const availableAgents = await db.query.agents.findMany({
      where: eq(agents.status, 'idle'),
    })

    const busyAgents = await db.query.agents.findMany({
      where: eq(agents.status, 'busy'),
    })

    const totalAgents = availableAgents.length + busyAgents.length

    // Simple scaling logic
    const targetAgents = Math.min(
      Math.max(2, pendingTasks.length, busyAgents.length + 1), // At least 2, max 1 per pending task
      10 // Max 10 agents
    )

    if (targetAgents !== totalAgents) {
      logger.info(`Auto-scaling agents: ${totalAgents} → ${targetAgents}`)
      await this.scaleAgents(targetAgents)
    }
  }

  async cleanupStaleAgents() {
    const staleThreshold = new Date(Date.now() - 5 * 60 * 1000) // 5 minutes

    const staleAgents = await db.query.agents.findMany({
      where: (agents, { lt }) => lt(agents.lastHeartbeat, staleThreshold),
    })

    for (const agent of staleAgents) {
      logger.warn(`Cleaning up stale agent: ${agent.id}`)
      await this.deleteAgent(agent.id)
    }
  }
}

// Start autoscaler
setInterval(async () => {
  const manager = new AgentManager()
  await manager.autoScale()
  await manager.cleanupStaleAgents()
}, 30000) // Every 30 seconds
```

## Agent Logs Streaming

```typescript
// api/routes/agents.ts
import { Router } from 'express'
import { K8sClient } from '../../services/kubernetes/client'
import { db } from '../../db/client'
import { agents } from '../../db/schema'
import { eq } from 'drizzle-orm'

const router = Router()
const k8sClient = new K8sClient()

router.get('/:agentId/logs/stream', async (req, res) => {
  const { agentId } = req.params

  const agent = await db.query.agents.findFirst({
    where: eq(agents.id, agentId),
  })

  if (!agent) {
    return res.status(404).json({ error: 'Agent not found' })
  }

  res.setHeader('Content-Type', 'text/event-stream')
  res.setHeader('Cache-Control', 'no-cache')
  res.setHeader('Connection', 'keep-alive')

  try {
    const logStream = await k8sClient.streamPodLogs(agent.k8sNamespace, agent.podName)

    logStream.on('data', (chunk) => {
      res.write(`data: ${chunk.toString()}\n\n`)
    })

    logStream.on('end', () => {
      res.end()
    })

    req.on('close', () => {
      logStream.destroy()
    })
  } catch (error) {
    res.status(500).json({ error: 'Failed to stream logs' })
  }
})

export default router
```

## Monitoring Agents

```bash
# Ver todos los agentes
kubectl get pods -n agents -l app=claude-agent

# Ver logs de un agente
kubectl logs -n agents claude-agent-abc123 -f

# Entrar a un agente
kubectl exec -it -n agents claude-agent-abc123 -- /bin/bash

# Ver recursos consumidos
kubectl top pods -n agents
```