Files
aiworker/docs/05-agents/claude-code-pods.md
Hector Ros db71705842 Complete documentation for future sessions
- CLAUDE.md for AI agents to understand the codebase
- GITEA-GUIDE.md centralizes all Gitea operations (API, Registry, Auth)
- DEVELOPMENT-WORKFLOW.md explains complete dev process
- ROADMAP.md, NEXT-SESSION.md for planning
- QUICK-REFERENCE.md, TROUBLESHOOTING.md for daily use
- 40+ detailed docs in /docs folder
- Backend as submodule from Gitea

Everything documented for autonomous operation.

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2026-01-20 00:37:19 +01:00

500 lines
12 KiB
Markdown

# Claude Code Agents - Pods en Kubernetes
## Dockerfile del Agente
```dockerfile
# Dockerfile
FROM node:20-alpine
# Install dependencies
RUN apk add --no-cache \
git \
openssh-client \
curl \
bash \
vim
# Install Bun
RUN curl -fsSL https://bun.sh/install | bash
ENV PATH="/root/.bun/bin:$PATH"
# Install Claude Code CLI
RUN npm install -g @anthropic-ai/claude-code
# Create workspace
WORKDIR /workspace
# Copy agent scripts
COPY scripts/agent-entrypoint.sh /usr/local/bin/
COPY scripts/agent-loop.sh /usr/local/bin/
RUN chmod +x /usr/local/bin/agent-*.sh
# Git config
RUN git config --global user.name "AiWorker Agent" && \
git config --global user.email "agent@aiworker.dev" && \
git config --global init.defaultBranch main
# Setup SSH
RUN mkdir -p /root/.ssh && \
ssh-keyscan -H git.aiworker.dev >> /root/.ssh/known_hosts
ENTRYPOINT ["/usr/local/bin/agent-entrypoint.sh"]
```
## Agent Entrypoint Script
```bash
#!/bin/bash
# scripts/agent-entrypoint.sh
set -e
echo "🤖 Starting AiWorker Agent..."
echo "Agent ID: $AGENT_ID"
# Setup SSH key
if [ -n "$GIT_SSH_KEY" ]; then
echo "$GIT_SSH_KEY" > /root/.ssh/id_ed25519
chmod 600 /root/.ssh/id_ed25519
fi
# Configure Claude Code with MCP Server
cat > /root/.claude-code/config.json <<EOF
{
"mcpServers": {
"aiworker": {
"command": "curl",
"args": [
"-X", "POST",
"-H", "Content-Type: application/json",
"-H", "X-Agent-ID: $AGENT_ID",
"$MCP_SERVER_URL/rpc"
]
}
}
}
EOF
# Send heartbeat
send_heartbeat() {
curl -s -X POST "$MCP_SERVER_URL/heartbeat" \
-H "Content-Type: application/json" \
-d "{\"agentId\":\"$AGENT_ID\",\"status\":\"$1\"}" > /dev/null 2>&1 || true
}
# Start heartbeat loop in background
while true; do
send_heartbeat "idle"
sleep 30
done &
HEARTBEAT_PID=$!
# Trap signals for graceful shutdown
trap "kill $HEARTBEAT_PID; send_heartbeat 'offline'; exit 0" SIGTERM SIGINT
# Start agent work loop
exec /usr/local/bin/agent-loop.sh
```
## Agent Work Loop
```bash
#!/bin/bash
# scripts/agent-loop.sh
set -e
echo "🔄 Starting agent work loop..."
while true; do
echo "📋 Checking for tasks..."
# Get next task via MCP
TASK=$(curl -s -X POST "$MCP_SERVER_URL/tools/call" \
-H "Content-Type: application/json" \
-d "{
\"name\": \"get_next_task\",
\"arguments\": {
\"agentId\": \"$AGENT_ID\"
}
}")
TASK_ID=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.id // empty')
if [ -z "$TASK_ID" ] || [ "$TASK_ID" = "null" ]; then
echo "💤 No tasks available, waiting..."
sleep 10
continue
fi
echo "🎯 Got task: $TASK_ID"
# Extract task details
TASK_TITLE=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.title')
TASK_DESC=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.description')
PROJECT_REPO=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.project.giteaRepoUrl')
echo "📝 Task: $TASK_TITLE"
echo "📦 Repo: $PROJECT_REPO"
# Log activity
curl -s -X POST "$MCP_SERVER_URL/tools/call" \
-H "Content-Type: application/json" \
-d "{
\"name\": \"log_activity\",
\"arguments\": {
\"agentId\": \"$AGENT_ID\",
\"level\": \"info\",
\"message\": \"Starting task: $TASK_TITLE\"
}
}" > /dev/null
# Clone repository
REPO_DIR="/workspace/task-$TASK_ID"
if [ ! -d "$REPO_DIR" ]; then
echo "📥 Cloning repository..."
git clone "$PROJECT_REPO" "$REPO_DIR"
fi
cd "$REPO_DIR"
# Create branch via MCP
echo "🌿 Creating branch..."
BRANCH_RESULT=$(curl -s -X POST "$MCP_SERVER_URL/tools/call" \
-H "Content-Type: application/json" \
-d "{
\"name\": \"create_branch\",
\"arguments\": {
\"taskId\": \"$TASK_ID\"
}
}")
BRANCH_NAME=$(echo "$BRANCH_RESULT" | jq -r '.content[0].text | fromjson | .branchName')
echo "🌿 Branch: $BRANCH_NAME"
# Fetch and checkout
git fetch origin
git checkout "$BRANCH_NAME" 2>/dev/null || git checkout -b "$BRANCH_NAME"
# Start Claude Code session
echo "🧠 Starting Claude Code session..."
# Create task prompt
TASK_PROMPT="I need you to work on the following task:
Title: $TASK_TITLE
Description:
$TASK_DESC
Instructions:
1. Analyze the codebase
2. Implement the required changes
3. Write tests if needed
4. Commit your changes with clear messages
5. When done, use the MCP tools to:
- create_pull_request with a summary
- trigger_preview_deploy
- update_task_status to 'ready_to_test'
If you need clarification, use ask_user_question.
Start working on this task now."
# Run Claude Code (with timeout of 2 hours)
timeout 7200 claude-code chat --message "$TASK_PROMPT" || {
STATUS=$?
if [ $STATUS -eq 124 ]; then
echo "⏰ Task timeout"
curl -s -X POST "$MCP_SERVER_URL/tools/call" \
-H "Content-Type: application/json" \
-d "{
\"name\": \"update_task_status\",
\"arguments\": {
\"taskId\": \"$TASK_ID\",
\"status\": \"needs_input\",
\"metadata\": {\"reason\": \"timeout\"}
}
}" > /dev/null
else
echo "❌ Claude Code exited with status $STATUS"
fi
}
echo "✅ Task completed: $TASK_ID"
# Cleanup
cd /workspace
rm -rf "$REPO_DIR"
# Brief pause before next task
sleep 5
done
```
## Pod Specification
```yaml
# k8s/agents/claude-agent-pod.yaml
apiVersion: v1
kind: Pod
metadata:
name: claude-agent-{{ AGENT_ID }}
namespace: agents
labels:
app: claude-agent
agent-id: "{{ AGENT_ID }}"
managed-by: aiworker
spec:
restartPolicy: Never
serviceAccountName: claude-agent
containers:
- name: agent
image: aiworker/claude-agent:latest
imagePullPolicy: Always
env:
- name: AGENT_ID
value: "{{ AGENT_ID }}"
- name: MCP_SERVER_URL
value: "http://aiworker-backend.control-plane.svc.cluster.local:3100"
- name: ANTHROPIC_API_KEY
valueFrom:
secretKeyRef:
name: aiworker-secrets
key: anthropic-api-key
- name: GITEA_URL
value: "http://gitea.gitea.svc.cluster.local:3000"
- name: GIT_SSH_KEY
valueFrom:
secretKeyRef:
name: git-ssh-keys
key: private-key
resources:
requests:
cpu: "500m"
memory: "1Gi"
limits:
cpu: "2"
memory: "4Gi"
volumeMounts:
- name: workspace
mountPath: /workspace
volumes:
- name: workspace
emptyDir:
sizeLimit: 10Gi
```
## Agent Manager (Backend)
```typescript
// services/kubernetes/agent-manager.ts
import { K8sClient } from './client'
import { db } from '../../db/client'
import { agents } from '../../db/schema'
import { eq } from 'drizzle-orm'
import crypto from 'crypto'
import { logger } from '../../utils/logger'
export class AgentManager {
private k8sClient: K8sClient
constructor() {
this.k8sClient = new K8sClient()
}
async createAgent(capabilities: string[] = []) {
const agentId = crypto.randomUUID()
// Create agent pod in K8s
const { podName, namespace } = await this.k8sClient.createAgentPod(agentId)
// Insert in database
await db.insert(agents).values({
id: agentId,
podName,
k8sNamespace: namespace,
status: 'initializing',
capabilities,
lastHeartbeat: new Date(),
})
logger.info(`Created agent: ${agentId}`)
return {
id: agentId,
podName,
namespace,
}
}
async deleteAgent(agentId: string) {
const agent = await db.query.agents.findFirst({
where: eq(agents.id, agentId),
})
if (!agent) {
throw new Error('Agent not found')
}
// Delete pod
await this.k8sClient.deletePod(agent.k8sNamespace, agent.podName)
// Delete from database
await db.delete(agents).where(eq(agents.id, agentId))
logger.info(`Deleted agent: ${agentId}`)
}
async scaleAgents(targetCount: number) {
const currentAgents = await db.query.agents.findMany()
if (currentAgents.length < targetCount) {
// Scale up
const toCreate = targetCount - currentAgents.length
logger.info(`Scaling up: creating ${toCreate} agents`)
for (let i = 0; i < toCreate; i++) {
await this.createAgent()
await new Promise(resolve => setTimeout(resolve, 1000)) // Stagger creation
}
} else if (currentAgents.length > targetCount) {
// Scale down
const toDelete = currentAgents.length - targetCount
logger.info(`Scaling down: deleting ${toDelete} agents`)
// Delete idle agents first
const idleAgents = currentAgents.filter(a => a.status === 'idle').slice(0, toDelete)
for (const agent of idleAgents) {
await this.deleteAgent(agent.id)
}
}
}
async autoScale() {
// Get pending tasks
const pendingTasks = await db.query.tasks.findMany({
where: eq(tasks.state, 'backlog'),
})
// Get available agents
const availableAgents = await db.query.agents.findMany({
where: eq(agents.status, 'idle'),
})
const busyAgents = await db.query.agents.findMany({
where: eq(agents.status, 'busy'),
})
const totalAgents = availableAgents.length + busyAgents.length
// Simple scaling logic
const targetAgents = Math.min(
Math.max(2, pendingTasks.length, busyAgents.length + 1), // At least 2, max 1 per pending task
10 // Max 10 agents
)
if (targetAgents !== totalAgents) {
logger.info(`Auto-scaling agents: ${totalAgents}${targetAgents}`)
await this.scaleAgents(targetAgents)
}
}
async cleanupStaleAgents() {
const staleThreshold = new Date(Date.now() - 5 * 60 * 1000) // 5 minutes
const staleAgents = await db.query.agents.findMany({
where: (agents, { lt }) => lt(agents.lastHeartbeat, staleThreshold),
})
for (const agent of staleAgents) {
logger.warn(`Cleaning up stale agent: ${agent.id}`)
await this.deleteAgent(agent.id)
}
}
}
// Start autoscaler
setInterval(async () => {
const manager = new AgentManager()
await manager.autoScale()
await manager.cleanupStaleAgents()
}, 30000) // Every 30 seconds
```
## Agent Logs Streaming
```typescript
// api/routes/agents.ts
import { Router } from 'express'
import { K8sClient } from '../../services/kubernetes/client'
import { db } from '../../db/client'
import { agents } from '../../db/schema'
import { eq } from 'drizzle-orm'
const router = Router()
const k8sClient = new K8sClient()
router.get('/:agentId/logs/stream', async (req, res) => {
const { agentId } = req.params
const agent = await db.query.agents.findFirst({
where: eq(agents.id, agentId),
})
if (!agent) {
return res.status(404).json({ error: 'Agent not found' })
}
res.setHeader('Content-Type', 'text/event-stream')
res.setHeader('Cache-Control', 'no-cache')
res.setHeader('Connection', 'keep-alive')
try {
const logStream = await k8sClient.streamPodLogs(agent.k8sNamespace, agent.podName)
logStream.on('data', (chunk) => {
res.write(`data: ${chunk.toString()}\n\n`)
})
logStream.on('end', () => {
res.end()
})
req.on('close', () => {
logStream.destroy()
})
} catch (error) {
res.status(500).json({ error: 'Failed to stream logs' })
}
})
export default router
```
## Monitoring Agents
```bash
# Ver todos los agentes
kubectl get pods -n agents -l app=claude-agent
# Ver logs de un agente
kubectl logs -n agents claude-agent-abc123 -f
# Entrar a un agente
kubectl exec -it -n agents claude-agent-abc123 -- /bin/bash
# Ver recursos consumidos
kubectl top pods -n agents
```