Complete documentation for future sessions
- CLAUDE.md for AI agents to understand the codebase - GITEA-GUIDE.md centralizes all Gitea operations (API, Registry, Auth) - DEVELOPMENT-WORKFLOW.md explains complete dev process - ROADMAP.md, NEXT-SESSION.md for planning - QUICK-REFERENCE.md, TROUBLESHOOTING.md for daily use - 40+ detailed docs in /docs folder - Backend as submodule from Gitea Everything documented for autonomous operation. Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
This commit is contained in:
499
docs/05-agents/claude-code-pods.md
Normal file
499
docs/05-agents/claude-code-pods.md
Normal file
@@ -0,0 +1,499 @@
|
||||
# Claude Code Agents - Pods en Kubernetes
|
||||
|
||||
## Dockerfile del Agente
|
||||
|
||||
```dockerfile
|
||||
# Dockerfile
|
||||
FROM node:20-alpine
|
||||
|
||||
# Install dependencies
|
||||
RUN apk add --no-cache \
|
||||
git \
|
||||
openssh-client \
|
||||
curl \
|
||||
bash \
|
||||
vim
|
||||
|
||||
# Install Bun
|
||||
RUN curl -fsSL https://bun.sh/install | bash
|
||||
ENV PATH="/root/.bun/bin:$PATH"
|
||||
|
||||
# Install Claude Code CLI
|
||||
RUN npm install -g @anthropic-ai/claude-code
|
||||
|
||||
# Create workspace
|
||||
WORKDIR /workspace
|
||||
|
||||
# Copy agent scripts
|
||||
COPY scripts/agent-entrypoint.sh /usr/local/bin/
|
||||
COPY scripts/agent-loop.sh /usr/local/bin/
|
||||
RUN chmod +x /usr/local/bin/agent-*.sh
|
||||
|
||||
# Git config
|
||||
RUN git config --global user.name "AiWorker Agent" && \
|
||||
git config --global user.email "agent@aiworker.dev" && \
|
||||
git config --global init.defaultBranch main
|
||||
|
||||
# Setup SSH
|
||||
RUN mkdir -p /root/.ssh && \
|
||||
ssh-keyscan -H git.aiworker.dev >> /root/.ssh/known_hosts
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/agent-entrypoint.sh"]
|
||||
```
|
||||
|
||||
## Agent Entrypoint Script
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# scripts/agent-entrypoint.sh
|
||||
|
||||
set -e
|
||||
|
||||
echo "🤖 Starting AiWorker Agent..."
|
||||
echo "Agent ID: $AGENT_ID"
|
||||
|
||||
# Setup SSH key
|
||||
if [ -n "$GIT_SSH_KEY" ]; then
|
||||
echo "$GIT_SSH_KEY" > /root/.ssh/id_ed25519
|
||||
chmod 600 /root/.ssh/id_ed25519
|
||||
fi
|
||||
|
||||
# Configure Claude Code with MCP Server
|
||||
cat > /root/.claude-code/config.json <<EOF
|
||||
{
|
||||
"mcpServers": {
|
||||
"aiworker": {
|
||||
"command": "curl",
|
||||
"args": [
|
||||
"-X", "POST",
|
||||
"-H", "Content-Type: application/json",
|
||||
"-H", "X-Agent-ID: $AGENT_ID",
|
||||
"$MCP_SERVER_URL/rpc"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
# Send heartbeat
|
||||
send_heartbeat() {
|
||||
curl -s -X POST "$MCP_SERVER_URL/heartbeat" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"agentId\":\"$AGENT_ID\",\"status\":\"$1\"}" > /dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
# Start heartbeat loop in background
|
||||
while true; do
|
||||
send_heartbeat "idle"
|
||||
sleep 30
|
||||
done &
|
||||
HEARTBEAT_PID=$!
|
||||
|
||||
# Trap signals for graceful shutdown
|
||||
trap "kill $HEARTBEAT_PID; send_heartbeat 'offline'; exit 0" SIGTERM SIGINT
|
||||
|
||||
# Start agent work loop
|
||||
exec /usr/local/bin/agent-loop.sh
|
||||
```
|
||||
|
||||
## Agent Work Loop
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# scripts/agent-loop.sh
|
||||
|
||||
set -e
|
||||
|
||||
echo "🔄 Starting agent work loop..."
|
||||
|
||||
while true; do
|
||||
echo "📋 Checking for tasks..."
|
||||
|
||||
# Get next task via MCP
|
||||
TASK=$(curl -s -X POST "$MCP_SERVER_URL/tools/call" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{
|
||||
\"name\": \"get_next_task\",
|
||||
\"arguments\": {
|
||||
\"agentId\": \"$AGENT_ID\"
|
||||
}
|
||||
}")
|
||||
|
||||
TASK_ID=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.id // empty')
|
||||
|
||||
if [ -z "$TASK_ID" ] || [ "$TASK_ID" = "null" ]; then
|
||||
echo "💤 No tasks available, waiting..."
|
||||
sleep 10
|
||||
continue
|
||||
fi
|
||||
|
||||
echo "🎯 Got task: $TASK_ID"
|
||||
|
||||
# Extract task details
|
||||
TASK_TITLE=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.title')
|
||||
TASK_DESC=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.description')
|
||||
PROJECT_REPO=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.project.giteaRepoUrl')
|
||||
|
||||
echo "📝 Task: $TASK_TITLE"
|
||||
echo "📦 Repo: $PROJECT_REPO"
|
||||
|
||||
# Log activity
|
||||
curl -s -X POST "$MCP_SERVER_URL/tools/call" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{
|
||||
\"name\": \"log_activity\",
|
||||
\"arguments\": {
|
||||
\"agentId\": \"$AGENT_ID\",
|
||||
\"level\": \"info\",
|
||||
\"message\": \"Starting task: $TASK_TITLE\"
|
||||
}
|
||||
}" > /dev/null
|
||||
|
||||
# Clone repository
|
||||
REPO_DIR="/workspace/task-$TASK_ID"
|
||||
if [ ! -d "$REPO_DIR" ]; then
|
||||
echo "📥 Cloning repository..."
|
||||
git clone "$PROJECT_REPO" "$REPO_DIR"
|
||||
fi
|
||||
|
||||
cd "$REPO_DIR"
|
||||
|
||||
# Create branch via MCP
|
||||
echo "🌿 Creating branch..."
|
||||
BRANCH_RESULT=$(curl -s -X POST "$MCP_SERVER_URL/tools/call" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{
|
||||
\"name\": \"create_branch\",
|
||||
\"arguments\": {
|
||||
\"taskId\": \"$TASK_ID\"
|
||||
}
|
||||
}")
|
||||
|
||||
BRANCH_NAME=$(echo "$BRANCH_RESULT" | jq -r '.content[0].text | fromjson | .branchName')
|
||||
echo "🌿 Branch: $BRANCH_NAME"
|
||||
|
||||
# Fetch and checkout
|
||||
git fetch origin
|
||||
git checkout "$BRANCH_NAME" 2>/dev/null || git checkout -b "$BRANCH_NAME"
|
||||
|
||||
# Start Claude Code session
|
||||
echo "🧠 Starting Claude Code session..."
|
||||
|
||||
# Create task prompt
|
||||
TASK_PROMPT="I need you to work on the following task:
|
||||
|
||||
Title: $TASK_TITLE
|
||||
|
||||
Description:
|
||||
$TASK_DESC
|
||||
|
||||
Instructions:
|
||||
1. Analyze the codebase
|
||||
2. Implement the required changes
|
||||
3. Write tests if needed
|
||||
4. Commit your changes with clear messages
|
||||
5. When done, use the MCP tools to:
|
||||
- create_pull_request with a summary
|
||||
- trigger_preview_deploy
|
||||
- update_task_status to 'ready_to_test'
|
||||
|
||||
If you need clarification, use ask_user_question.
|
||||
|
||||
Start working on this task now."
|
||||
|
||||
# Run Claude Code (with timeout of 2 hours)
|
||||
timeout 7200 claude-code chat --message "$TASK_PROMPT" || {
|
||||
STATUS=$?
|
||||
if [ $STATUS -eq 124 ]; then
|
||||
echo "⏰ Task timeout"
|
||||
curl -s -X POST "$MCP_SERVER_URL/tools/call" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{
|
||||
\"name\": \"update_task_status\",
|
||||
\"arguments\": {
|
||||
\"taskId\": \"$TASK_ID\",
|
||||
\"status\": \"needs_input\",
|
||||
\"metadata\": {\"reason\": \"timeout\"}
|
||||
}
|
||||
}" > /dev/null
|
||||
else
|
||||
echo "❌ Claude Code exited with status $STATUS"
|
||||
fi
|
||||
}
|
||||
|
||||
echo "✅ Task completed: $TASK_ID"
|
||||
|
||||
# Cleanup
|
||||
cd /workspace
|
||||
rm -rf "$REPO_DIR"
|
||||
|
||||
# Brief pause before next task
|
||||
sleep 5
|
||||
done
|
||||
```
|
||||
|
||||
## Pod Specification
|
||||
|
||||
```yaml
|
||||
# k8s/agents/claude-agent-pod.yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: claude-agent-{{ AGENT_ID }}
|
||||
namespace: agents
|
||||
labels:
|
||||
app: claude-agent
|
||||
agent-id: "{{ AGENT_ID }}"
|
||||
managed-by: aiworker
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
serviceAccountName: claude-agent
|
||||
|
||||
containers:
|
||||
- name: agent
|
||||
image: aiworker/claude-agent:latest
|
||||
imagePullPolicy: Always
|
||||
|
||||
env:
|
||||
- name: AGENT_ID
|
||||
value: "{{ AGENT_ID }}"
|
||||
|
||||
- name: MCP_SERVER_URL
|
||||
value: "http://aiworker-backend.control-plane.svc.cluster.local:3100"
|
||||
|
||||
- name: ANTHROPIC_API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: aiworker-secrets
|
||||
key: anthropic-api-key
|
||||
|
||||
- name: GITEA_URL
|
||||
value: "http://gitea.gitea.svc.cluster.local:3000"
|
||||
|
||||
- name: GIT_SSH_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: git-ssh-keys
|
||||
key: private-key
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: "500m"
|
||||
memory: "1Gi"
|
||||
limits:
|
||||
cpu: "2"
|
||||
memory: "4Gi"
|
||||
|
||||
volumeMounts:
|
||||
- name: workspace
|
||||
mountPath: /workspace
|
||||
|
||||
volumes:
|
||||
- name: workspace
|
||||
emptyDir:
|
||||
sizeLimit: 10Gi
|
||||
```
|
||||
|
||||
## Agent Manager (Backend)
|
||||
|
||||
```typescript
|
||||
// services/kubernetes/agent-manager.ts
|
||||
import { K8sClient } from './client'
|
||||
import { db } from '../../db/client'
|
||||
import { agents } from '../../db/schema'
|
||||
import { eq } from 'drizzle-orm'
|
||||
import crypto from 'crypto'
|
||||
import { logger } from '../../utils/logger'
|
||||
|
||||
export class AgentManager {
|
||||
private k8sClient: K8sClient
|
||||
|
||||
constructor() {
|
||||
this.k8sClient = new K8sClient()
|
||||
}
|
||||
|
||||
async createAgent(capabilities: string[] = []) {
|
||||
const agentId = crypto.randomUUID()
|
||||
|
||||
// Create agent pod in K8s
|
||||
const { podName, namespace } = await this.k8sClient.createAgentPod(agentId)
|
||||
|
||||
// Insert in database
|
||||
await db.insert(agents).values({
|
||||
id: agentId,
|
||||
podName,
|
||||
k8sNamespace: namespace,
|
||||
status: 'initializing',
|
||||
capabilities,
|
||||
lastHeartbeat: new Date(),
|
||||
})
|
||||
|
||||
logger.info(`Created agent: ${agentId}`)
|
||||
|
||||
return {
|
||||
id: agentId,
|
||||
podName,
|
||||
namespace,
|
||||
}
|
||||
}
|
||||
|
||||
async deleteAgent(agentId: string) {
|
||||
const agent = await db.query.agents.findFirst({
|
||||
where: eq(agents.id, agentId),
|
||||
})
|
||||
|
||||
if (!agent) {
|
||||
throw new Error('Agent not found')
|
||||
}
|
||||
|
||||
// Delete pod
|
||||
await this.k8sClient.deletePod(agent.k8sNamespace, agent.podName)
|
||||
|
||||
// Delete from database
|
||||
await db.delete(agents).where(eq(agents.id, agentId))
|
||||
|
||||
logger.info(`Deleted agent: ${agentId}`)
|
||||
}
|
||||
|
||||
async scaleAgents(targetCount: number) {
|
||||
const currentAgents = await db.query.agents.findMany()
|
||||
|
||||
if (currentAgents.length < targetCount) {
|
||||
// Scale up
|
||||
const toCreate = targetCount - currentAgents.length
|
||||
logger.info(`Scaling up: creating ${toCreate} agents`)
|
||||
|
||||
for (let i = 0; i < toCreate; i++) {
|
||||
await this.createAgent()
|
||||
await new Promise(resolve => setTimeout(resolve, 1000)) // Stagger creation
|
||||
}
|
||||
} else if (currentAgents.length > targetCount) {
|
||||
// Scale down
|
||||
const toDelete = currentAgents.length - targetCount
|
||||
logger.info(`Scaling down: deleting ${toDelete} agents`)
|
||||
|
||||
// Delete idle agents first
|
||||
const idleAgents = currentAgents.filter(a => a.status === 'idle').slice(0, toDelete)
|
||||
|
||||
for (const agent of idleAgents) {
|
||||
await this.deleteAgent(agent.id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async autoScale() {
|
||||
// Get pending tasks
|
||||
const pendingTasks = await db.query.tasks.findMany({
|
||||
where: eq(tasks.state, 'backlog'),
|
||||
})
|
||||
|
||||
// Get available agents
|
||||
const availableAgents = await db.query.agents.findMany({
|
||||
where: eq(agents.status, 'idle'),
|
||||
})
|
||||
|
||||
const busyAgents = await db.query.agents.findMany({
|
||||
where: eq(agents.status, 'busy'),
|
||||
})
|
||||
|
||||
const totalAgents = availableAgents.length + busyAgents.length
|
||||
|
||||
// Simple scaling logic
|
||||
const targetAgents = Math.min(
|
||||
Math.max(2, pendingTasks.length, busyAgents.length + 1), // At least 2, max 1 per pending task
|
||||
10 // Max 10 agents
|
||||
)
|
||||
|
||||
if (targetAgents !== totalAgents) {
|
||||
logger.info(`Auto-scaling agents: ${totalAgents} → ${targetAgents}`)
|
||||
await this.scaleAgents(targetAgents)
|
||||
}
|
||||
}
|
||||
|
||||
async cleanupStaleAgents() {
|
||||
const staleThreshold = new Date(Date.now() - 5 * 60 * 1000) // 5 minutes
|
||||
|
||||
const staleAgents = await db.query.agents.findMany({
|
||||
where: (agents, { lt }) => lt(agents.lastHeartbeat, staleThreshold),
|
||||
})
|
||||
|
||||
for (const agent of staleAgents) {
|
||||
logger.warn(`Cleaning up stale agent: ${agent.id}`)
|
||||
await this.deleteAgent(agent.id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Start autoscaler
|
||||
setInterval(async () => {
|
||||
const manager = new AgentManager()
|
||||
await manager.autoScale()
|
||||
await manager.cleanupStaleAgents()
|
||||
}, 30000) // Every 30 seconds
|
||||
```
|
||||
|
||||
## Agent Logs Streaming
|
||||
|
||||
```typescript
|
||||
// api/routes/agents.ts
|
||||
import { Router } from 'express'
|
||||
import { K8sClient } from '../../services/kubernetes/client'
|
||||
import { db } from '../../db/client'
|
||||
import { agents } from '../../db/schema'
|
||||
import { eq } from 'drizzle-orm'
|
||||
|
||||
const router = Router()
|
||||
const k8sClient = new K8sClient()
|
||||
|
||||
router.get('/:agentId/logs/stream', async (req, res) => {
|
||||
const { agentId } = req.params
|
||||
|
||||
const agent = await db.query.agents.findFirst({
|
||||
where: eq(agents.id, agentId),
|
||||
})
|
||||
|
||||
if (!agent) {
|
||||
return res.status(404).json({ error: 'Agent not found' })
|
||||
}
|
||||
|
||||
res.setHeader('Content-Type', 'text/event-stream')
|
||||
res.setHeader('Cache-Control', 'no-cache')
|
||||
res.setHeader('Connection', 'keep-alive')
|
||||
|
||||
try {
|
||||
const logStream = await k8sClient.streamPodLogs(agent.k8sNamespace, agent.podName)
|
||||
|
||||
logStream.on('data', (chunk) => {
|
||||
res.write(`data: ${chunk.toString()}\n\n`)
|
||||
})
|
||||
|
||||
logStream.on('end', () => {
|
||||
res.end()
|
||||
})
|
||||
|
||||
req.on('close', () => {
|
||||
logStream.destroy()
|
||||
})
|
||||
} catch (error) {
|
||||
res.status(500).json({ error: 'Failed to stream logs' })
|
||||
}
|
||||
})
|
||||
|
||||
export default router
|
||||
```
|
||||
|
||||
## Monitoring Agents
|
||||
|
||||
```bash
|
||||
# Ver todos los agentes
|
||||
kubectl get pods -n agents -l app=claude-agent
|
||||
|
||||
# Ver logs de un agente
|
||||
kubectl logs -n agents claude-agent-abc123 -f
|
||||
|
||||
# Entrar a un agente
|
||||
kubectl exec -it -n agents claude-agent-abc123 -- /bin/bash
|
||||
|
||||
# Ver recursos consumidos
|
||||
kubectl top pods -n agents
|
||||
```
|
||||
Reference in New Issue
Block a user