Complete documentation for future sessions
- CLAUDE.md for AI agents to understand the codebase - GITEA-GUIDE.md centralizes all Gitea operations (API, Registry, Auth) - DEVELOPMENT-WORKFLOW.md explains complete dev process - ROADMAP.md, NEXT-SESSION.md for planning - QUICK-REFERENCE.md, TROUBLESHOOTING.md for daily use - 40+ detailed docs in /docs folder - Backend as submodule from Gitea Everything documented for autonomous operation. Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
This commit is contained in:
456
docs/04-kubernetes/cluster-setup.md
Normal file
456
docs/04-kubernetes/cluster-setup.md
Normal file
@@ -0,0 +1,456 @@
|
||||
# Setup del Cluster Kubernetes
|
||||
|
||||
## Requisitos
|
||||
|
||||
- Kubernetes 1.28+
|
||||
- kubectl CLI
|
||||
- helm 3.x
|
||||
- 4 GB RAM mínimo
|
||||
- 20 GB storage
|
||||
|
||||
## Instalación Local (Kind/Minikube)
|
||||
|
||||
### Con Kind (recomendado para desarrollo)
|
||||
|
||||
```bash
|
||||
# Instalar kind
|
||||
brew install kind # macOS
|
||||
# o
|
||||
curl -Lo ./kind https://kind.sigs.k8s.io/dl/latest/kind-linux-amd64
|
||||
chmod +x ./kind
|
||||
sudo mv ./kind /usr/local/bin/kind
|
||||
|
||||
# Crear cluster con configuración personalizada
|
||||
cat <<EOF | kind create cluster --name aiworker --config=-
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
- role: control-plane
|
||||
kubeadmConfigPatches:
|
||||
- |
|
||||
kind: InitConfiguration
|
||||
nodeRegistration:
|
||||
kubeletExtraArgs:
|
||||
node-labels: "ingress-ready=true"
|
||||
extraPortMappings:
|
||||
- containerPort: 80
|
||||
hostPort: 80
|
||||
protocol: TCP
|
||||
- containerPort: 443
|
||||
hostPort: 443
|
||||
protocol: TCP
|
||||
- role: worker
|
||||
- role: worker
|
||||
EOF
|
||||
|
||||
# Verificar
|
||||
kubectl cluster-info --context kind-aiworker
|
||||
kubectl get nodes
|
||||
```
|
||||
|
||||
### Con Minikube
|
||||
|
||||
```bash
|
||||
# Instalar minikube
|
||||
brew install minikube # macOS
|
||||
|
||||
# Iniciar cluster
|
||||
minikube start --cpus=4 --memory=8192 --disk-size=40g --driver=docker
|
||||
|
||||
# Habilitar addons
|
||||
minikube addons enable ingress
|
||||
minikube addons enable metrics-server
|
||||
minikube addons enable storage-provisioner
|
||||
|
||||
# Verificar
|
||||
kubectl get nodes
|
||||
```
|
||||
|
||||
## Instalación en Cloud
|
||||
|
||||
### Google Kubernetes Engine (GKE)
|
||||
|
||||
```bash
|
||||
# Instalar gcloud CLI
|
||||
brew install --cask google-cloud-sdk
|
||||
|
||||
# Autenticar
|
||||
gcloud auth login
|
||||
gcloud config set project YOUR_PROJECT_ID
|
||||
|
||||
# Crear cluster
|
||||
gcloud container clusters create aiworker \
|
||||
--zone us-central1-a \
|
||||
--num-nodes 3 \
|
||||
--machine-type n1-standard-2 \
|
||||
--disk-size 30 \
|
||||
--enable-autoscaling \
|
||||
--min-nodes 2 \
|
||||
--max-nodes 5 \
|
||||
--enable-autorepair \
|
||||
--enable-autoupgrade
|
||||
|
||||
# Obtener credenciales
|
||||
gcloud container clusters get-credentials aiworker --zone us-central1-a
|
||||
|
||||
# Verificar
|
||||
kubectl get nodes
|
||||
```
|
||||
|
||||
### Amazon EKS
|
||||
|
||||
```bash
|
||||
# Instalar eksctl
|
||||
brew install eksctl
|
||||
|
||||
# Crear cluster
|
||||
eksctl create cluster \
|
||||
--name aiworker \
|
||||
--region us-west-2 \
|
||||
--nodegroup-name workers \
|
||||
--node-type t3.medium \
|
||||
--nodes 3 \
|
||||
--nodes-min 2 \
|
||||
--nodes-max 5 \
|
||||
--managed
|
||||
|
||||
# Verificar
|
||||
kubectl get nodes
|
||||
```
|
||||
|
||||
### Azure AKS
|
||||
|
||||
```bash
|
||||
# Instalar Azure CLI
|
||||
brew install azure-cli
|
||||
|
||||
# Login
|
||||
az login
|
||||
|
||||
# Crear resource group
|
||||
az group create --name aiworker-rg --location eastus
|
||||
|
||||
# Crear cluster
|
||||
az aks create \
|
||||
--resource-group aiworker-rg \
|
||||
--name aiworker \
|
||||
--node-count 3 \
|
||||
--node-vm-size Standard_D2s_v3 \
|
||||
--enable-cluster-autoscaler \
|
||||
--min-count 2 \
|
||||
--max-count 5 \
|
||||
--generate-ssh-keys
|
||||
|
||||
# Obtener credenciales
|
||||
az aks get-credentials --resource-group aiworker-rg --name aiworker
|
||||
|
||||
# Verificar
|
||||
kubectl get nodes
|
||||
```
|
||||
|
||||
## Instalación de Componentes Base
|
||||
|
||||
### Nginx Ingress Controller
|
||||
|
||||
```bash
|
||||
# Instalar con Helm
|
||||
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
|
||||
helm repo update
|
||||
|
||||
helm install ingress-nginx ingress-nginx/ingress-nginx \
|
||||
--namespace ingress-nginx \
|
||||
--create-namespace \
|
||||
--set controller.replicaCount=2 \
|
||||
--set controller.nodeSelector."kubernetes\.io/os"=linux \
|
||||
--set controller.admissionWebhooks.patch.nodeSelector."kubernetes\.io/os"=linux
|
||||
|
||||
# Verificar
|
||||
kubectl get pods -n ingress-nginx
|
||||
kubectl get svc -n ingress-nginx
|
||||
```
|
||||
|
||||
### Cert-Manager (TLS)
|
||||
|
||||
```bash
|
||||
# Instalar cert-manager
|
||||
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.0/cert-manager.yaml
|
||||
|
||||
# Verificar
|
||||
kubectl get pods -n cert-manager
|
||||
|
||||
# Crear ClusterIssuer para Let's Encrypt
|
||||
cat <<EOF | kubectl apply -f -
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: letsencrypt-prod
|
||||
spec:
|
||||
acme:
|
||||
server: https://acme-v02.api.letsencrypt.org/directory
|
||||
email: your-email@example.com
|
||||
privateKeySecretRef:
|
||||
name: letsencrypt-prod
|
||||
solvers:
|
||||
- http01:
|
||||
ingress:
|
||||
class: nginx
|
||||
EOF
|
||||
```
|
||||
|
||||
### Metrics Server
|
||||
|
||||
```bash
|
||||
# Instalar metrics-server
|
||||
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
|
||||
|
||||
# Verificar
|
||||
kubectl get deployment metrics-server -n kube-system
|
||||
kubectl top nodes
|
||||
```
|
||||
|
||||
### Prometheus & Grafana (opcional)
|
||||
|
||||
```bash
|
||||
# Añadir repo
|
||||
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||||
helm repo update
|
||||
|
||||
# Instalar kube-prometheus-stack
|
||||
helm install prometheus prometheus-community/kube-prometheus-stack \
|
||||
--namespace monitoring \
|
||||
--create-namespace \
|
||||
--set prometheus.prometheusSpec.retention=30d \
|
||||
--set grafana.adminPassword=admin
|
||||
|
||||
# Verificar
|
||||
kubectl get pods -n monitoring
|
||||
|
||||
# Port-forward para acceder a Grafana
|
||||
kubectl port-forward -n monitoring svc/prometheus-grafana 3001:80
|
||||
# http://localhost:3001 (admin/admin)
|
||||
```
|
||||
|
||||
## Creación de Namespaces
|
||||
|
||||
```bash
|
||||
# Script de creación de namespaces
|
||||
cat <<EOF | kubectl apply -f -
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: control-plane
|
||||
labels:
|
||||
name: control-plane
|
||||
environment: production
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: agents
|
||||
labels:
|
||||
name: agents
|
||||
environment: production
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: gitea
|
||||
labels:
|
||||
name: gitea
|
||||
environment: production
|
||||
EOF
|
||||
|
||||
# Verificar
|
||||
kubectl get namespaces
|
||||
```
|
||||
|
||||
## Configuración de RBAC
|
||||
|
||||
```bash
|
||||
# ServiceAccount para backend
|
||||
cat <<EOF | kubectl apply -f -
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: aiworker-backend
|
||||
namespace: control-plane
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: aiworker-backend
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["pods", "pods/log", "pods/exec"]
|
||||
verbs: ["get", "list", "create", "delete"]
|
||||
- apiGroups: [""]
|
||||
resources: ["namespaces"]
|
||||
verbs: ["get", "list", "create", "delete"]
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["deployments", "replicasets"]
|
||||
verbs: ["get", "list", "create", "update", "patch", "delete"]
|
||||
- apiGroups: [""]
|
||||
resources: ["services"]
|
||||
verbs: ["get", "list", "create", "update", "delete"]
|
||||
- apiGroups: ["networking.k8s.io"]
|
||||
resources: ["ingresses"]
|
||||
verbs: ["get", "list", "create", "update", "delete"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: aiworker-backend
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: aiworker-backend
|
||||
namespace: control-plane
|
||||
roleRef:
|
||||
kind: ClusterRole
|
||||
name: aiworker-backend
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
EOF
|
||||
```
|
||||
|
||||
## Secrets y ConfigMaps
|
||||
|
||||
```bash
|
||||
# Crear secret para credentials
|
||||
kubectl create secret generic aiworker-secrets \
|
||||
--namespace=control-plane \
|
||||
--from-literal=db-password='your-db-password' \
|
||||
--from-literal=gitea-token='your-gitea-token' \
|
||||
--from-literal=anthropic-api-key='your-anthropic-key'
|
||||
|
||||
# ConfigMap para configuración
|
||||
cat <<EOF | kubectl apply -f -
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: aiworker-config
|
||||
namespace: control-plane
|
||||
data:
|
||||
GITEA_URL: "http://gitea.gitea.svc.cluster.local:3000"
|
||||
K8S_DEFAULT_NAMESPACE: "aiworker"
|
||||
NODE_ENV: "production"
|
||||
EOF
|
||||
```
|
||||
|
||||
## Storage Classes
|
||||
|
||||
```bash
|
||||
# Crear StorageClass para preview environments (fast SSD)
|
||||
cat <<EOF | kubectl apply -f -
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
name: fast-ssd
|
||||
provisioner: kubernetes.io/gce-pd # Cambiar según cloud provider
|
||||
parameters:
|
||||
type: pd-ssd
|
||||
replication-type: none
|
||||
reclaimPolicy: Delete
|
||||
volumeBindingMode: WaitForFirstConsumer
|
||||
EOF
|
||||
```
|
||||
|
||||
## Network Policies
|
||||
|
||||
```bash
|
||||
# Aislar namespaces de preview
|
||||
cat <<EOF | kubectl apply -f -
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
name: preview-isolation
|
||||
namespace: agents
|
||||
spec:
|
||||
podSelector:
|
||||
matchLabels:
|
||||
env: preview
|
||||
policyTypes:
|
||||
- Ingress
|
||||
- Egress
|
||||
ingress:
|
||||
- from:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
name: control-plane
|
||||
egress:
|
||||
- to:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
name: gitea
|
||||
- to:
|
||||
- namespaceSelector: {}
|
||||
podSelector:
|
||||
matchLabels:
|
||||
k8s-app: kube-dns
|
||||
EOF
|
||||
```
|
||||
|
||||
## Verificación Final
|
||||
|
||||
```bash
|
||||
# Script de verificación
|
||||
cat > verify-cluster.sh <<'EOF'
|
||||
#!/bin/bash
|
||||
|
||||
echo "🔍 Verificando cluster..."
|
||||
|
||||
echo "✓ Nodes:"
|
||||
kubectl get nodes
|
||||
|
||||
echo "✓ Namespaces:"
|
||||
kubectl get namespaces
|
||||
|
||||
echo "✓ Ingress Controller:"
|
||||
kubectl get pods -n ingress-nginx
|
||||
|
||||
echo "✓ Cert-Manager:"
|
||||
kubectl get pods -n cert-manager
|
||||
|
||||
echo "✓ Metrics Server:"
|
||||
kubectl top nodes 2>/dev/null || echo "⚠️ Metrics not available yet"
|
||||
|
||||
echo "✓ Storage Classes:"
|
||||
kubectl get storageclass
|
||||
|
||||
echo "✅ Cluster setup complete!"
|
||||
EOF
|
||||
|
||||
chmod +x verify-cluster.sh
|
||||
./verify-cluster.sh
|
||||
```
|
||||
|
||||
## Mantenimiento
|
||||
|
||||
```bash
|
||||
# Actualizar componentes
|
||||
helm repo update
|
||||
helm upgrade ingress-nginx ingress-nginx/ingress-nginx -n ingress-nginx
|
||||
|
||||
# Limpiar recursos viejos
|
||||
kubectl delete pods --field-selector=status.phase=Failed -A
|
||||
kubectl delete pods --field-selector=status.phase=Succeeded -A
|
||||
|
||||
# Backup de configuración
|
||||
kubectl get all --all-namespaces -o yaml > cluster-backup.yaml
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
```bash
|
||||
# Ver logs de componentes
|
||||
kubectl logs -n ingress-nginx deployment/ingress-nginx-controller
|
||||
kubectl logs -n cert-manager deployment/cert-manager
|
||||
|
||||
# Describir recursos con problemas
|
||||
kubectl describe pod <pod-name> -n <namespace>
|
||||
|
||||
# Eventos del cluster
|
||||
kubectl get events --all-namespaces --sort-by='.lastTimestamp'
|
||||
|
||||
# Recursos consumidos
|
||||
kubectl top nodes
|
||||
kubectl top pods -A
|
||||
```
|
||||
Reference in New Issue
Block a user