Compare commits

...

11 Commits

Author SHA1 Message Date
Hector Ros
db09616a69 Create K8s Service per agent for terminal access
All checks were successful
Build and Push Backend / build (push) Successful in 5s
- Create ClusterIP service for each agent pod
- Service exposes port 7681 (ttyd terminal)
- Service DNS: {podName}-terminal.agents.svc.cluster.local
- Backend proxy uses service DNS instead of pod IP
- Fixes WebSocket proxy issues
- Services are created/deleted with pods
2026-01-20 20:18:22 +01:00
Hector Ros
6864258810 Fix getPodIP: handle different K8s API response structures
All checks were successful
Build and Push Backend / build (push) Successful in 6s
- Add debug logging to getPodIP
- Handle both response.body and direct response
- Apply same fix to getPodStatus for consistency
- Fixes 500 error when accessing agent terminal
2026-01-20 19:56:04 +01:00
Hector Ros
209b439d26 Fix terminal proxy: use pod IP instead of non-existent DNS
All checks were successful
Build and Push Backend / build (push) Successful in 6s
- Add getPodIP() function to get pod IP from K8s API
- Update terminal proxy to use pod IP directly
- Add logging for proxy requests
- Fixes terminal showing black screen issue
2026-01-20 18:50:08 +01:00
Hector Ros
3fef6030ea Fix: Handle undefined result.body.metadata gracefully
All checks were successful
Build and Push Backend / build (push) Successful in 5s
Pod creation succeeds but response structure may not have metadata.
Add safe navigation to prevent error.

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2026-01-20 18:35:21 +01:00
Hector Ros
65b18d13b5 Fix: Pass HTTPS agent as request option directly
All checks were successful
Build and Push Backend / build (push) Successful in 11s
applyToHTTPSOptions doesn't work reliably. Instead, pass the
httpsAgent as the last parameter to createNamespacedPod.

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2026-01-20 18:17:30 +01:00
Hector Ros
9eb9def85c Add extensive debug logging for K8s client
All checks were successful
Build and Push Backend / build (push) Successful in 4s
- Log cluster config details
- Log HTTPS agent setup
- Log detailed error information on pod creation failure
- Track applyToHTTPSOptions execution

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2026-01-20 18:13:59 +01:00
Hector Ros
697ee1b426 Fix: Use custom HTTPS agent to skip SSL verification
All checks were successful
Build and Push Backend / build (push) Successful in 4s
skipTLSVerify flag is not respected by @kubernetes/client-node.
Solution: Create custom https.Agent with rejectUnauthorized: false

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2026-01-20 18:10:17 +01:00
Hector Ros
3bc59dc964 Fix: Apply skipTLSVerify in loadFromCluster mode
All checks were successful
Build and Push Backend / build (push) Successful in 5s
When K8S_IN_CLUSTER=true, backend uses loadFromCluster() which needs
skipTLSVerify to work with self-signed cluster certificates.

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2026-01-20 17:57:23 +01:00
Hector Ros
14ae28f13c Add TLS skip for K8s client when in-cluster
All checks were successful
Build and Push Backend / build (push) Successful in 4s
- Configure skipTLSVerify for cluster certificates
- Better handling of in-cluster vs out-of-cluster modes

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2026-01-20 17:53:04 +01:00
Hector Ros
e0c6884a7b Fix K8s API calls: use param object format
All checks were successful
Build and Push Backend / build (push) Successful in 5s
The @kubernetes/client-node API expects parameters as an object:
{ namespace: 'ns', body: pod } instead of positional params.

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2026-01-20 17:48:56 +01:00
Hector Ros
6f1c51bfd8 Fix K8s pod creation: use correct V1Pod format
All checks were successful
Build and Push Backend / build (push) Successful in 4s
- Remove apiVersion and kind from pod spec (not needed for client-node)
- Remove namespace from metadata (passed as parameter)
- Use proper V1Pod type from @kubernetes/client-node

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2026-01-20 17:37:38 +01:00
3 changed files with 195 additions and 27 deletions

View File

@@ -8,7 +8,7 @@ import { agents, tasks } from '../../db/schema'
import { eq, and } from 'drizzle-orm' import { eq, and } from 'drizzle-orm'
import { randomUUID } from 'crypto' import { randomUUID } from 'crypto'
import { authenticateRequest } from '../middleware/auth' import { authenticateRequest } from '../middleware/auth'
import { createAgentPod, deleteAgentPod } from '../../lib/k8s' import { createAgentPod, deleteAgentPod, createAgentService, deleteAgentService } from '../../lib/k8s'
/** /**
* Handle all agent routes * Handle all agent routes
@@ -332,12 +332,13 @@ async function unregisterAgent(agentId: string, userId: string): Promise<Respons
.where(eq(tasks.id, existing[0].currentTaskId)) .where(eq(tasks.id, existing[0].currentTaskId))
} }
// Delete K8s pod // Delete K8s pod and service
try { try {
await deleteAgentPod(existing[0].podName) await deleteAgentPod(existing[0].podName)
await deleteAgentService(existing[0].podName)
} catch (k8sError) { } catch (k8sError) {
console.error('Failed to delete pod, continuing...', k8sError) console.error('Failed to delete pod/service, continuing...', k8sError)
// Continue even if pod deletion fails // Continue even if deletion fails
} }
// Delete agent from DB // Delete agent from DB
@@ -402,11 +403,12 @@ async function launchAgent(userId: string, req: Request): Promise<Response> {
await db.insert(agents).values(newAgent) await db.insert(agents).values(newAgent)
// Create K8s pod // Create K8s pod and service
try { try {
await createAgentPod(podName, userId) await createAgentPod(podName, userId, agentId)
await createAgentService(podName, agentId)
} catch (k8sError: any) { } catch (k8sError: any) {
// If pod creation fails, rollback DB entry // If pod/service creation fails, rollback DB entry
await db.delete(agents).where(eq(agents.id, agentId)) await db.delete(agents).where(eq(agents.id, agentId))
throw new Error(`Failed to create pod: ${k8sError.message}`) throw new Error(`Failed to create pod: ${k8sError.message}`)
} }

View File

@@ -110,19 +110,23 @@ const server = Bun.serve({
) )
} }
// Proxy to agent terminal // Proxy to agent terminal via service DNS
const agentUrl = `http://${agent.podName}.agents.svc.cluster.local:7681${url.pathname.replace(`/agent-terminal/${agentId}`, '')}${url.search}` // Service name: {podName}-terminal.agents.svc.cluster.local:7681
const agentPath = url.pathname.replace(`/agent-terminal/${agentId}`, '') || '/'
const serviceUrl = `http://${agent.podName}-terminal.agents.svc.cluster.local:7681${agentPath}${url.search}`
console.log(`🔄 Proxying terminal request to ${serviceUrl}`)
try { try {
const response = await fetch(agentUrl, { const response = await fetch(serviceUrl, {
method: req.method, method: req.method,
headers: req.headers, headers: req.headers,
body: req.body, body: req.body,
}) })
return response return response
} catch (error) { } catch (error: any) {
console.error('Terminal proxy error:', error) console.error('Terminal proxy error:', error.message)
return Response.json( return Response.json(
{ success: false, message: 'Failed to connect to agent terminal' }, { success: false, message: 'Failed to connect to agent terminal' },
{ status: 502 } { status: 502 }

View File

@@ -3,6 +3,7 @@
*/ */
import * as k8s from '@kubernetes/client-node' import * as k8s from '@kubernetes/client-node'
import * as https from 'https'
let k8sClient: k8s.CoreV1Api | null = null let k8sClient: k8s.CoreV1Api | null = null
let k8sConfig: k8s.KubeConfig | null = null let k8sConfig: k8s.KubeConfig | null = null
@@ -20,6 +21,33 @@ export function initK8sClient() {
if (inCluster) { if (inCluster) {
k8sConfig.loadFromCluster() k8sConfig.loadFromCluster()
console.log('📦 Loaded K8s config from cluster')
// Skip TLS verification when in cluster
// This is needed because the cluster uses self-signed certificates
const cluster = k8sConfig.getCurrentCluster()
console.log('📦 Current cluster:', cluster)
if (cluster) {
cluster.skipTLSVerify = true
console.log('🔓 Set skipTLSVerify = true')
}
// Create custom HTTPS agent that ignores certificate errors
const httpsAgent = new https.Agent({
rejectUnauthorized: false
})
console.log('🔓 Created HTTPS agent with rejectUnauthorized: false')
// Apply custom agent to the config
try {
k8sConfig.applyToHTTPSOptions({
httpsAgent: httpsAgent
} as any)
console.log('✅ Applied custom HTTPS agent to K8s config')
} catch (applyError: any) {
console.error('❌ Failed to apply HTTPS options:', applyError.message)
}
} else { } else {
// Load from kubeconfig file // Load from kubeconfig file
const configPath = process.env.K8S_CONFIG_PATH || process.env.KUBECONFIG || '~/.kube/config' const configPath = process.env.K8S_CONFIG_PATH || process.env.KUBECONFIG || '~/.kube/config'
@@ -40,19 +68,34 @@ export function getK8sClient(): k8s.CoreV1Api {
return k8sClient return k8sClient
} }
/**
* Get Kubernetes client with custom request options
* This ensures the HTTPS agent is used for each request
*/
export function getK8sClientWithOptions(): { client: k8s.CoreV1Api, options: any } {
const client = getK8sClient()
// Create request options with custom HTTPS agent
const options = {
httpsAgent: new https.Agent({
rejectUnauthorized: false
})
}
return { client, options }
}
/** /**
* Create pod spec for agent * Create pod spec for agent
*/ */
export function createAgentPodSpec(podName: string, userId: string) { export function createAgentPodSpec(podName: string, userId: string): k8s.V1Pod {
return { return {
apiVersion: 'v1',
kind: 'Pod',
metadata: { metadata: {
name: podName, name: podName,
namespace: 'agents',
labels: { labels: {
app: 'claude-agent', app: 'claude-agent',
userId: userId, userId: userId,
podName: podName,
'aiworker.io/agent': 'true', 'aiworker.io/agent': 'true',
}, },
}, },
@@ -146,17 +189,96 @@ export function createAgentPodSpec(podName: string, userId: string) {
} }
/** /**
* Create agent pod in Kubernetes * Create service for agent pod (for terminal access)
*/ */
export async function createAgentPod(podName: string, userId: string): Promise<void> { export async function createAgentService(podName: string, agentId: string): Promise<void> {
const client = getK8sClient() const client = getK8sClient()
const podSpec = createAgentPodSpec(podName, userId)
const serviceSpec: k8s.V1Service = {
metadata: {
name: `${podName}-terminal`,
namespace: 'agents',
labels: {
app: 'claude-agent-terminal',
agentId: agentId,
}
},
spec: {
selector: {
app: 'claude-agent',
podName: podName,
},
ports: [{
name: 'terminal',
port: 7681,
targetPort: 7681 as any,
protocol: 'TCP'
}],
type: 'ClusterIP'
}
}
try { try {
await client.createNamespacedPod('agents', podSpec) await client.createNamespacedService({
console.log(`✅ Pod ${podName} created successfully`) namespace: 'agents',
body: serviceSpec
})
console.log(`✅ Service ${podName}-terminal created`)
} catch (error: any) { } catch (error: any) {
console.error(`❌ Failed to create pod ${podName}:`, error.message) console.error(`❌ Failed to create service:`, error.message)
throw error
}
}
/**
* Delete agent service
*/
export async function deleteAgentService(podName: string): Promise<void> {
const client = getK8sClient()
try {
await client.deleteNamespacedService({
name: `${podName}-terminal`,
namespace: 'agents'
})
console.log(`✅ Service ${podName}-terminal deleted`)
} catch (error: any) {
if (error.statusCode === 404 || error.response?.statusCode === 404) {
console.log(`⚠️ Service ${podName}-terminal not found`)
return
}
console.error(`❌ Error deleting service:`, error.message)
}
}
/**
* Create agent pod in Kubernetes
*/
export async function createAgentPod(podName: string, userId: string, agentId: string): Promise<void> {
const { client, options } = getK8sClientWithOptions()
const podSpec = createAgentPodSpec(podName, userId)
console.log(`🔧 Creating pod ${podName} for user ${userId}`)
console.log(`🔧 Using custom HTTPS agent with rejectUnauthorized: false`)
try {
const result = await client.createNamespacedPod({
namespace: 'agents',
body: podSpec
}, undefined, undefined, undefined, undefined, options)
console.log(`✅ Pod ${podName} created successfully`)
if (result?.body?.metadata?.uid) {
console.log(`✅ Pod UID: ${result.body.metadata.uid}`)
}
} catch (error: any) {
console.error(`❌ Failed to create pod ${podName}`)
console.error(`❌ Error message:`, error.message)
console.error(`❌ Error code:`, error.code)
if (error.response) {
console.error(`❌ Response status:`, error.response.statusCode)
console.error(`❌ Response body:`, error.response.body)
}
throw error throw error
} }
} }
@@ -168,11 +290,14 @@ export async function deleteAgentPod(podName: string): Promise<void> {
const client = getK8sClient() const client = getK8sClient()
try { try {
await client.deleteNamespacedPod(podName, 'agents') await client.deleteNamespacedPod({
name: podName,
namespace: 'agents'
})
console.log(`✅ Pod ${podName} deleted successfully`) console.log(`✅ Pod ${podName} deleted successfully`)
} catch (error: any) { } catch (error: any) {
// Ignore 404 errors (pod already deleted) // Ignore 404 errors (pod already deleted)
if (error.statusCode === 404) { if (error.statusCode === 404 || error.response?.statusCode === 404) {
console.log(`⚠️ Pod ${podName} not found (already deleted)`) console.log(`⚠️ Pod ${podName} not found (already deleted)`)
return return
} }
@@ -188,10 +313,47 @@ export async function getPodStatus(podName: string): Promise<string | null> {
const client = getK8sClient() const client = getK8sClient()
try { try {
const response = await client.readNamespacedPod(podName, 'agents') const response = await client.readNamespacedPod({
return response.body.status?.phase || null name: podName,
namespace: 'agents'
})
// Handle different response structures
const pod = response.body || response
return pod?.status?.phase || null
} catch (error: any) { } catch (error: any) {
if (error.statusCode === 404) { if (error.statusCode === 404 || error.response?.statusCode === 404) {
return null
}
throw error
}
}
/**
* Get pod IP address
*/
export async function getPodIP(podName: string): Promise<string | null> {
const client = getK8sClient()
try {
console.log(`🔍 Getting IP for pod: ${podName}`)
const response = await client.readNamespacedPod({
name: podName,
namespace: 'agents'
})
console.log(`🔍 Response type: ${typeof response}`)
console.log(`🔍 Has body: ${'body' in response}`)
// Handle different response structures
const pod = response.body || response
const podIP = pod?.status?.podIP
console.log(`🔍 Pod IP: ${podIP}`)
return podIP || null
} catch (error: any) {
console.error(`❌ Error getting pod IP for ${podName}:`, error.message)
if (error.statusCode === 404 || error.response?.statusCode === 404) {
return null return null
} }
throw error throw error