- CLAUDE.md for AI agents to understand the codebase - GITEA-GUIDE.md centralizes all Gitea operations (API, Registry, Auth) - DEVELOPMENT-WORKFLOW.md explains complete dev process - ROADMAP.md, NEXT-SESSION.md for planning - QUICK-REFERENCE.md, TROUBLESHOOTING.md for daily use - 40+ detailed docs in /docs folder - Backend as submodule from Gitea Everything documented for autonomous operation. Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
453 lines
9.8 KiB
Markdown
453 lines
9.8 KiB
Markdown
# Ciclo de Vida de los Agentes
|
|
|
|
## Estados del Agente
|
|
|
|
```
|
|
┌──────────────┐
|
|
│ Initializing │
|
|
└──────┬───────┘
|
|
│
|
|
▼
|
|
┌──────┐ ┌──────┐
|
|
│ Idle │◄───►│ Busy │
|
|
└───┬──┘ └──┬───┘
|
|
│ │
|
|
│ │
|
|
▼ ▼
|
|
┌───────┐ ┌───────┐
|
|
│ Error │ │Offline│
|
|
└───────┘ └───────┘
|
|
```
|
|
|
|
## Inicialización
|
|
|
|
### 1. Creación del Pod
|
|
|
|
```typescript
|
|
// Backend crea el pod
|
|
const agentManager = new AgentManager()
|
|
const agent = await agentManager.createAgent(['javascript', 'react'])
|
|
|
|
// Resultado
|
|
{
|
|
id: 'agent-abc123',
|
|
podName: 'claude-agent-abc123',
|
|
namespace: 'agents',
|
|
status: 'initializing'
|
|
}
|
|
```
|
|
|
|
### 2. Arranque del Contenedor
|
|
|
|
```bash
|
|
# En el pod (entrypoint.sh)
|
|
echo "🤖 Starting agent: $AGENT_ID"
|
|
|
|
# 1. Setup SSH
|
|
echo "$GIT_SSH_KEY" > /root/.ssh/id_ed25519
|
|
chmod 600 /root/.ssh/id_ed25519
|
|
|
|
# 2. Configure Claude Code MCP
|
|
cat > /root/.claude-code/config.json <<EOF
|
|
{
|
|
"mcpServers": {
|
|
"aiworker": {
|
|
"url": "$MCP_SERVER_URL"
|
|
}
|
|
}
|
|
}
|
|
EOF
|
|
|
|
# 3. Send initial heartbeat
|
|
curl -X POST "$MCP_SERVER_URL/heartbeat" \
|
|
-H "Content-Type: application/json" \
|
|
-H "X-Agent-ID: $AGENT_ID" \
|
|
-d '{"status":"idle"}'
|
|
|
|
# 4. Start work loop
|
|
exec /usr/local/bin/agent-loop.sh
|
|
```
|
|
|
|
### 3. Registro en el Sistema
|
|
|
|
```typescript
|
|
// Backend detecta el heartbeat y actualiza
|
|
await db.update(agents)
|
|
.set({
|
|
status: 'idle',
|
|
lastHeartbeat: new Date(),
|
|
})
|
|
.where(eq(agents.id, agentId))
|
|
|
|
logger.info(`Agent ${agentId} is now active`)
|
|
```
|
|
|
|
## Asignación de Tarea
|
|
|
|
### 1. Agent Polling
|
|
|
|
```bash
|
|
# agent-loop.sh
|
|
while true; do
|
|
echo "📋 Checking for tasks..."
|
|
|
|
TASK=$(curl -s -X POST "$MCP_SERVER_URL/tools/call" \
|
|
-H "Content-Type: application/json" \
|
|
-d "{
|
|
\"name\": \"get_next_task\",
|
|
\"arguments\": {\"agentId\": \"$AGENT_ID\"}
|
|
}")
|
|
|
|
TASK_ID=$(echo "$TASK" | jq -r '.content[0].text | fromjson | .task.id // empty')
|
|
|
|
if [ -n "$TASK_ID" ]; then
|
|
echo "🎯 Got task: $TASK_ID"
|
|
process_task "$TASK_ID"
|
|
else
|
|
sleep 10
|
|
fi
|
|
done
|
|
```
|
|
|
|
### 2. Backend Asigna Tarea
|
|
|
|
```typescript
|
|
// services/mcp/handlers.ts - getNextTask()
|
|
async function getNextTask(args: { agentId: string }) {
|
|
// 1. Buscar siguiente tarea en backlog
|
|
const task = await db.query.tasks.findFirst({
|
|
where: eq(tasks.state, 'backlog'),
|
|
orderBy: [desc(tasks.priority), asc(tasks.createdAt)],
|
|
})
|
|
|
|
if (!task) {
|
|
return { content: [{ type: 'text', text: JSON.stringify({ message: 'No tasks' }) }] }
|
|
}
|
|
|
|
// 2. Asignar al agente
|
|
await db.update(tasks)
|
|
.set({
|
|
state: 'in_progress',
|
|
assignedAgentId: args.agentId,
|
|
assignedAt: new Date(),
|
|
startedAt: new Date(),
|
|
})
|
|
.where(eq(tasks.id, task.id))
|
|
|
|
// 3. Actualizar agente
|
|
await db.update(agents)
|
|
.set({
|
|
status: 'busy',
|
|
currentTaskId: task.id,
|
|
})
|
|
.where(eq(agents.id, args.agentId))
|
|
|
|
// 4. Retornar tarea
|
|
return {
|
|
content: [{
|
|
type: 'text',
|
|
text: JSON.stringify({ task }),
|
|
}],
|
|
}
|
|
}
|
|
```
|
|
|
|
## Trabajo en Tarea
|
|
|
|
### Fase 1: Setup
|
|
|
|
```bash
|
|
# Clone repo
|
|
git clone "$PROJECT_REPO" "/workspace/task-$TASK_ID"
|
|
cd "/workspace/task-$TASK_ID"
|
|
|
|
# Create branch (via MCP)
|
|
curl -X POST "$MCP_SERVER_URL/tools/call" \
|
|
-d "{\"name\": \"create_branch\", \"arguments\": {\"taskId\": \"$TASK_ID\"}}"
|
|
|
|
# Checkout branch
|
|
git fetch origin
|
|
git checkout "$BRANCH_NAME"
|
|
```
|
|
|
|
### Fase 2: Implementación
|
|
|
|
```bash
|
|
# Start Claude Code session
|
|
claude-code chat --message "
|
|
I need you to work on this task:
|
|
|
|
Title: $TASK_TITLE
|
|
Description: $TASK_DESC
|
|
|
|
Instructions:
|
|
1. Analyze the codebase
|
|
2. Implement the changes
|
|
3. Write tests
|
|
4. Commit with clear messages
|
|
5. Use MCP tools when done
|
|
|
|
Start working now.
|
|
"
|
|
```
|
|
|
|
### Fase 3: Preguntas (opcional)
|
|
|
|
```typescript
|
|
// Si el agente necesita info
|
|
await mcp.callTool('ask_user_question', {
|
|
taskId,
|
|
question: 'Should I add TypeScript types?',
|
|
context: 'The codebase is in JavaScript...',
|
|
})
|
|
|
|
// Cambiar estado a needs_input
|
|
await mcp.callTool('update_task_status', {
|
|
taskId,
|
|
status: 'needs_input',
|
|
})
|
|
|
|
// Hacer polling cada 5s hasta respuesta
|
|
let response
|
|
while (!response) {
|
|
await sleep(5000)
|
|
const check = await mcp.callTool('check_question_response', { taskId })
|
|
if (check.hasResponse) {
|
|
response = check.response
|
|
}
|
|
}
|
|
|
|
// Continuar con la respuesta
|
|
await mcp.callTool('update_task_status', {
|
|
taskId,
|
|
status: 'in_progress',
|
|
})
|
|
```
|
|
|
|
### Fase 4: Finalización
|
|
|
|
```bash
|
|
# Create PR
|
|
curl -X POST "$MCP_SERVER_URL/tools/call" \
|
|
-d "{
|
|
\"name\": \"create_pull_request\",
|
|
\"arguments\": {
|
|
\"taskId\": \"$TASK_ID\",
|
|
\"title\": \"$TASK_TITLE\",
|
|
\"description\": \"Implemented feature X...\"
|
|
}
|
|
}"
|
|
|
|
# Deploy preview
|
|
curl -X POST "$MCP_SERVER_URL/tools/call" \
|
|
-d "{
|
|
\"name\": \"trigger_preview_deploy\",
|
|
\"arguments\": {\"taskId\": \"$TASK_ID\"}
|
|
}"
|
|
|
|
# Update status
|
|
curl -X POST "$MCP_SERVER_URL/tools/call" \
|
|
-d "{
|
|
\"name\": \"update_task_status\",
|
|
\"arguments\": {
|
|
\"taskId\": \"$TASK_ID\",
|
|
\"status\": \"ready_to_test\"
|
|
}
|
|
}"
|
|
```
|
|
|
|
## Liberación del Agente
|
|
|
|
```typescript
|
|
// Cuando tarea completa (ready_to_test o completed)
|
|
await db.update(agents)
|
|
.set({
|
|
status: 'idle',
|
|
currentTaskId: null,
|
|
tasksCompleted: sql`tasks_completed + 1`,
|
|
})
|
|
.where(eq(agents.id, agentId))
|
|
|
|
logger.info(`Agent ${agentId} completed task ${taskId}, now idle`)
|
|
```
|
|
|
|
## Manejo de Errores
|
|
|
|
### Timeout de Tarea
|
|
|
|
```bash
|
|
# agent-loop.sh con timeout
|
|
timeout 7200 claude-code chat --message "$TASK_PROMPT" || {
|
|
STATUS=$?
|
|
if [ $STATUS -eq 124 ]; then
|
|
echo "⏰ Task timeout after 2 hours"
|
|
|
|
# Notify backend
|
|
curl -X POST "$MCP_SERVER_URL/tools/call" \
|
|
-d "{
|
|
\"name\": \"update_task_status\",
|
|
\"arguments\": {
|
|
\"taskId\": \"$TASK_ID\",
|
|
\"status\": \"needs_input\",
|
|
\"metadata\": {\"reason\": \"timeout\"}
|
|
}
|
|
}"
|
|
|
|
# Log error
|
|
curl -X POST "$MCP_SERVER_URL/tools/call" \
|
|
-d "{
|
|
\"name\": \"log_activity\",
|
|
\"arguments\": {
|
|
\"agentId\": \"$AGENT_ID\",
|
|
\"level\": \"error\",
|
|
\"message\": \"Task timeout: $TASK_ID\"
|
|
}
|
|
}"
|
|
fi
|
|
}
|
|
```
|
|
|
|
### Crash del Agente
|
|
|
|
```typescript
|
|
// Backend detecta agente sin heartbeat
|
|
async function checkStaleAgents() {
|
|
const staleThreshold = new Date(Date.now() - 5 * 60 * 1000) // 5 min
|
|
|
|
const staleAgents = await db.query.agents.findMany({
|
|
where: lt(agents.lastHeartbeat, staleThreshold),
|
|
})
|
|
|
|
for (const agent of staleAgents) {
|
|
logger.warn(`Agent ${agent.id} is stale`)
|
|
|
|
// Mark current task as needs attention
|
|
if (agent.currentTaskId) {
|
|
await db.update(tasks)
|
|
.set({
|
|
state: 'backlog',
|
|
assignedAgentId: null,
|
|
})
|
|
.where(eq(tasks.id, agent.currentTaskId))
|
|
}
|
|
|
|
// Delete agent pod
|
|
await k8sClient.deletePod(agent.k8sNamespace, agent.podName)
|
|
|
|
// Remove from DB
|
|
await db.delete(agents).where(eq(agents.id, agent.id))
|
|
|
|
// Create replacement
|
|
await agentManager.createAgent()
|
|
}
|
|
}
|
|
|
|
// Run every minute
|
|
setInterval(checkStaleAgents, 60000)
|
|
```
|
|
|
|
## Terminación Graciosa
|
|
|
|
```bash
|
|
# agent-entrypoint.sh
|
|
cleanup() {
|
|
echo "🛑 Shutting down agent..."
|
|
|
|
# Send offline status
|
|
curl -X POST "$MCP_SERVER_URL/heartbeat" \
|
|
-d '{"status":"offline"}' 2>/dev/null || true
|
|
|
|
# Kill background jobs
|
|
kill $HEARTBEAT_PID 2>/dev/null || true
|
|
|
|
echo "👋 Goodbye"
|
|
exit 0
|
|
}
|
|
|
|
trap cleanup SIGTERM SIGINT
|
|
|
|
# Wait for signals
|
|
wait
|
|
```
|
|
|
|
## Auto-Scaling
|
|
|
|
```typescript
|
|
// Auto-scaler que corre cada 30s
|
|
async function autoScale() {
|
|
// Get metrics
|
|
const pendingTasks = await db.query.tasks.findMany({
|
|
where: eq(tasks.state, 'backlog'),
|
|
})
|
|
|
|
const idleAgents = await db.query.agents.findMany({
|
|
where: eq(agents.status, 'idle'),
|
|
})
|
|
|
|
const busyAgents = await db.query.agents.findMany({
|
|
where: eq(agents.status, 'busy'),
|
|
})
|
|
|
|
const totalAgents = idleAgents.length + busyAgents.length
|
|
|
|
// Decision logic
|
|
let targetAgents = totalAgents
|
|
|
|
// Scale up if:
|
|
// - More than 3 pending tasks
|
|
// - No idle agents
|
|
if (pendingTasks.length > 3 && idleAgents.length === 0) {
|
|
targetAgents = Math.min(totalAgents + 2, 10) // Max 10
|
|
}
|
|
|
|
// Scale down if:
|
|
// - No pending tasks
|
|
// - More than 2 idle agents
|
|
if (pendingTasks.length === 0 && idleAgents.length > 2) {
|
|
targetAgents = Math.max(totalAgents - 1, 2) // Min 2
|
|
}
|
|
|
|
if (targetAgents !== totalAgents) {
|
|
logger.info(`Auto-scaling: ${totalAgents} → ${targetAgents}`)
|
|
await agentManager.scaleAgents(targetAgents)
|
|
}
|
|
}
|
|
|
|
setInterval(autoScale, 30000)
|
|
```
|
|
|
|
## Métricas del Ciclo de Vida
|
|
|
|
```typescript
|
|
// Endpoint para métricas de agentes
|
|
router.get('/agents/metrics', async (req, res) => {
|
|
const agents = await db.query.agents.findMany()
|
|
|
|
const metrics = {
|
|
total: agents.length,
|
|
byStatus: {
|
|
idle: agents.filter((a) => a.status === 'idle').length,
|
|
busy: agents.filter((a) => a.status === 'busy').length,
|
|
error: agents.filter((a) => a.status === 'error').length,
|
|
offline: agents.filter((a) => a.status === 'offline').length,
|
|
},
|
|
totalTasksCompleted: agents.reduce((sum, a) => sum + a.tasksCompleted, 0),
|
|
avgTasksPerAgent:
|
|
agents.reduce((sum, a) => sum + a.tasksCompleted, 0) / agents.length || 0,
|
|
totalRuntime: agents.reduce((sum, a) => sum + a.totalRuntimeMinutes, 0),
|
|
}
|
|
|
|
res.json(metrics)
|
|
})
|
|
```
|
|
|
|
## Dashboard Visualization
|
|
|
|
En el frontend, mostrar:
|
|
- **Estado actual** de cada agente (idle/busy/error)
|
|
- **Tarea actual** si está busy
|
|
- **Historial** de tareas completadas
|
|
- **Métricas** (tareas/hora, uptime, etc.)
|
|
- **Botones** para restart/delete agente
|
|
- **Logs en tiempo real** de cada agente
|