Complete documentation for future sessions

- CLAUDE.md for AI agents to understand the codebase
- GITEA-GUIDE.md centralizes all Gitea operations (API, Registry, Auth)
- DEVELOPMENT-WORKFLOW.md explains complete dev process
- ROADMAP.md, NEXT-SESSION.md for planning
- QUICK-REFERENCE.md, TROUBLESHOOTING.md for daily use
- 40+ detailed docs in /docs folder
- Backend as submodule from Gitea

Everything documented for autonomous operation.

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hector Ros
2026-01-20 00:36:53 +01:00
commit db71705842
49 changed files with 19162 additions and 0 deletions

221
scripts/install-k3s-cluster.sh Executable file
View File

@@ -0,0 +1,221 @@
#!/bin/bash
# AiWorker K3s HA Cluster Installation Script
# Location: Houston, Texas (us-hou-1)
# K3s Version: v1.35.0+k3s1
set -e
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
echo -e "${GREEN}🚀 AiWorker K3s HA Cluster Installation${NC}"
echo "========================================"
# Configuration
K3S_VERSION="v1.35.0+k3s1"
CONTROL_PLANE_IPS=("108.165.47.233" "108.165.47.235" "108.165.47.215")
CONTROL_PLANE_PRIVATE=("10.100.0.2" "10.100.0.3" "10.100.0.4")
WORKER_IPS=("108.165.47.225" "108.165.47.224" "108.165.47.222")
WORKER_PRIVATE=("10.100.0.5" "10.100.0.6" "10.100.0.7")
# Step 1: Install first control plane with cluster-init
echo -e "\n${YELLOW}Step 1/5: Installing first control plane (HA mode)${NC}"
ssh -o StrictHostKeyChecking=no root@${CONTROL_PLANE_IPS[0]} "curl -sfL https://get.k3s.io | \
INSTALL_K3S_VERSION=${K3S_VERSION} \
INSTALL_K3S_EXEC='server \
--cluster-init \
--disable traefik \
--disable servicelb \
--node-name k8s-cp-01 \
--node-ip ${CONTROL_PLANE_PRIVATE[0]} \
--flannel-iface eth1 \
--tls-san ${CONTROL_PLANE_IPS[0]} \
--tls-san ${CONTROL_PLANE_IPS[1]} \
--tls-san ${CONTROL_PLANE_IPS[2]} \
--tls-san ${CONTROL_PLANE_PRIVATE[0]} \
--tls-san ${CONTROL_PLANE_PRIVATE[1]} \
--tls-san ${CONTROL_PLANE_PRIVATE[2]}' \
sh -"
echo -e "${GREEN}✓ First control plane installed${NC}"
# Get K3s token
echo -e "\n${YELLOW}Retrieving K3s token...${NC}"
K3S_TOKEN=$(ssh root@${CONTROL_PLANE_IPS[0]} "cat /var/lib/rancher/k3s/server/node-token")
echo -e "${GREEN}✓ Token retrieved${NC}"
# Download kubeconfig
echo -e "\n${YELLOW}Downloading kubeconfig...${NC}"
ssh root@${CONTROL_PLANE_IPS[0]} "cat /etc/rancher/k3s/k3s.yaml" | \
sed "s/127.0.0.1/${CONTROL_PLANE_IPS[0]}/g" > ~/.kube/aiworker-config
chmod 600 ~/.kube/aiworker-config
echo -e "${GREEN}✓ Kubeconfig saved to ~/.kube/aiworker-config${NC}"
# Wait for first node to be ready
echo -e "\n${YELLOW}Waiting for first node to be ready...${NC}"
sleep 10
# Step 2: Join additional control planes
echo -e "\n${YELLOW}Step 2/5: Joining additional control planes${NC}"
for i in 1 2; do
echo " Installing k8s-cp-0$((i+1))..."
ssh -o StrictHostKeyChecking=no root@${CONTROL_PLANE_IPS[$i]} "curl -sfL https://get.k3s.io | \
INSTALL_K3S_VERSION=${K3S_VERSION} \
K3S_TOKEN='${K3S_TOKEN}' \
INSTALL_K3S_EXEC='server \
--server https://${CONTROL_PLANE_PRIVATE[0]}:6443 \
--disable traefik \
--disable servicelb \
--node-name k8s-cp-0$((i+1)) \
--node-ip ${CONTROL_PLANE_PRIVATE[$i]} \
--flannel-iface eth1 \
--tls-san ${CONTROL_PLANE_IPS[0]} \
--tls-san ${CONTROL_PLANE_IPS[1]} \
--tls-san ${CONTROL_PLANE_IPS[2]}' \
sh -" &
done
wait
echo -e "${GREEN}✓ All control planes installed${NC}"
sleep 15
# Step 3: Join worker nodes
echo -e "\n${YELLOW}Step 3/5: Joining worker nodes${NC}"
for i in 0 1 2; do
echo " Installing k8s-worker-0$((i+1))..."
ssh -o StrictHostKeyChecking=no root@${WORKER_IPS[$i]} "curl -sfL https://get.k3s.io | \
INSTALL_K3S_VERSION=${K3S_VERSION} \
K3S_TOKEN='${K3S_TOKEN}' \
K3S_URL='https://${CONTROL_PLANE_PRIVATE[0]}:6443' \
INSTALL_K3S_EXEC='--node-name k8s-worker-0$((i+1)) \
--node-ip ${WORKER_PRIVATE[$i]} \
--flannel-iface eth1' \
sh -" &
done
wait
echo -e "${GREEN}✓ All workers joined${NC}"
sleep 15
# Step 4: Verify cluster
echo -e "\n${YELLOW}Step 4/5: Verifying cluster${NC}"
export KUBECONFIG=~/.kube/aiworker-config
kubectl get nodes -o wide
# Step 5: Install core components
echo -e "\n${YELLOW}Step 5/5: Installing core components${NC}"
# Nginx Ingress
echo " Installing Nginx Ingress Controller..."
kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/main/deploy/static/provider/baremetal/deploy.yaml
kubectl wait --for=condition=ready pod -l app.kubernetes.io/component=controller -n ingress-nginx --timeout=300s
echo -e "${GREEN}✓ Nginx Ingress installed${NC}"
# Cert-Manager
echo " Installing Cert-Manager..."
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.16.2/cert-manager.yaml
kubectl wait --for=condition=ready pod -l app.kubernetes.io/component=controller -n cert-manager --timeout=300s
echo -e "${GREEN}✓ Cert-Manager installed${NC}"
# Create Let's Encrypt issuers
cat <<EOL | kubectl apply -f -
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-prod
spec:
acme:
server: https://acme-v02.api.letsencrypt.org/directory
email: hector+aiworker@teamsuqad.io
privateKeySecretRef:
name: letsencrypt-prod
solvers:
- http01:
ingress:
class: nginx
---
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-staging
spec:
acme:
server: https://acme-staging-v02.api.letsencrypt.org/directory
email: hector+aiworker@teamsuqad.io
privateKeySecretRef:
name: letsencrypt-staging
solvers:
- http01:
ingress:
class: nginx
EOL
echo -e "${GREEN}✓ Let's Encrypt issuers created${NC}"
# Create namespaces
cat <<EOL | kubectl apply -f -
apiVersion: v1
kind: Namespace
metadata:
name: control-plane
labels:
name: control-plane
environment: production
---
apiVersion: v1
kind: Namespace
metadata:
name: agents
labels:
name: agents
environment: production
---
apiVersion: v1
kind: Namespace
metadata:
name: gitea
labels:
name: gitea
environment: production
---
apiVersion: v1
kind: Namespace
metadata:
name: monitoring
labels:
name: monitoring
environment: production
EOL
echo -e "${GREEN}✓ Project namespaces created${NC}"
# ArgoCD
echo " Installing ArgoCD..."
kubectl create namespace argocd
kubectl apply -n argocd -f https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=argocd-server -n argocd --timeout=300s
echo -e "${GREEN}✓ ArgoCD installed${NC}"
# Get ArgoCD password
ARGOCD_PASSWORD=$(kubectl get secret argocd-initial-admin-secret -n argocd -o jsonpath="{.data.password}" | base64 -d)
echo -e "\n${GREEN}========================================${NC}"
echo -e "${GREEN}✅ Cluster installation complete!${NC}"
echo -e "${GREEN}========================================${NC}"
echo ""
echo -e "📊 Cluster Status:"
kubectl get nodes
echo ""
echo -e "🔐 Access Information:"
echo -e " Kubeconfig: ~/.kube/aiworker-config"
echo -e " ArgoCD: https://argocd.fuq.tv"
echo -e " Username: admin"
echo -e " Password: ${ARGOCD_PASSWORD}"
echo ""
echo -e "💡 Next steps:"
echo -e " 1. Configure DNS: *.fuq.tv → 108.165.47.221, 108.165.47.203"
echo -e " 2. Deploy applications via ArgoCD"
echo -e " 3. Access ArgoCD at https://argocd.fuq.tv"

129
scripts/setup-load-balancers.sh Executable file
View File

@@ -0,0 +1,129 @@
#!/bin/bash
# Configure HAProxy Load Balancers for AiWorker K3s Cluster
set -e
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
echo -e "${GREEN}🔧 Configuring Load Balancers${NC}"
LB_IPS=("108.165.47.221" "108.165.47.203")
LB_NAMES=("k8s-lb-01" "k8s-lb-02")
# Get Nginx Ingress NodePort ports
echo -e "\n${YELLOW}Getting Nginx Ingress NodePorts...${NC}"
HTTP_PORT=$(kubectl --kubeconfig ~/.kube/aiworker-config get svc -n ingress-nginx ingress-nginx-controller -o jsonpath='{.spec.ports[?(@.port==80)].nodePort}')
HTTPS_PORT=$(kubectl --kubeconfig ~/.kube/aiworker-config get svc -n ingress-nginx ingress-nginx-controller -o jsonpath='{.spec.ports[?(@.port==443)].nodePort}')
echo " HTTP NodePort: ${HTTP_PORT}"
echo " HTTPS NodePort: ${HTTPS_PORT}"
# Create HAProxy configuration
cat > /tmp/haproxy.cfg <<EOF
global
log /dev/log local0
log /dev/log local1 notice
chroot /var/lib/haproxy
stats socket /run/haproxy/admin.sock mode 660 level admin
stats timeout 30s
user haproxy
group haproxy
daemon
maxconn 4000
defaults
log global
mode http
option httplog
option dontlognull
timeout connect 5000
timeout client 50000
timeout server 50000
# Frontend HTTP (port 80)
frontend http_frontend
bind *:80
mode http
option httplog
option forwardfor
default_backend http_backend
# Backend HTTP - Workers NodePort ${HTTP_PORT}
backend http_backend
mode http
balance roundrobin
option httpchk GET /healthz
http-check expect status 200
server k8s-worker-01 10.100.0.5:${HTTP_PORT} check
server k8s-worker-02 10.100.0.6:${HTTP_PORT} check
server k8s-worker-03 10.100.0.7:${HTTP_PORT} check
# Frontend HTTPS (port 443)
frontend https_frontend
bind *:443
mode tcp
option tcplog
default_backend https_backend
# Backend HTTPS - Workers NodePort ${HTTPS_PORT} (TCP passthrough)
backend https_backend
mode tcp
balance roundrobin
option tcp-check
server k8s-worker-01 10.100.0.5:${HTTPS_PORT} check
server k8s-worker-02 10.100.0.6:${HTTPS_PORT} check
server k8s-worker-03 10.100.0.7:${HTTPS_PORT} check
# Stats interface
frontend stats
bind *:8404
mode http
stats enable
stats uri /stats
stats refresh 10s
stats auth admin:aiworker2026
EOF
# Deploy to both load balancers
for i in 0 1; do
echo -e "\n${YELLOW}Configuring ${LB_NAMES[$i]}...${NC}"
# Install HAProxy if not installed
ssh root@${LB_IPS[$i]} "which haproxy || (apt update && apt install -y haproxy)"
# Deploy configuration
scp /tmp/haproxy.cfg root@${LB_IPS[$i]}:/etc/haproxy/haproxy.cfg
# Restart HAProxy
ssh root@${LB_IPS[$i]} "systemctl restart haproxy && systemctl enable haproxy"
# Verify
if ssh root@${LB_IPS[$i]} "systemctl is-active haproxy" | grep -q "active"; then
echo -e "${GREEN}${LB_NAMES[$i]} configured and running${NC}"
else
echo -e "${RED}${LB_NAMES[$i]} failed to start${NC}"
exit 1
fi
done
echo -e "\n${GREEN}========================================${NC}"
echo -e "${GREEN}✅ Load Balancers configured!${NC}"
echo -e "${GREEN}========================================${NC}"
echo ""
echo -e "📊 HAProxy Stats:"
echo -e " LB-01: http://108.165.47.221:8404/stats"
echo -e " LB-02: http://108.165.47.203:8404/stats"
echo -e " Credentials: admin / aiworker2026"
echo ""
echo -e "🌐 DNS Configuration:"
echo -e " *.fuq.tv A 108.165.47.221"
echo -e " *.fuq.tv A 108.165.47.203"
echo -e " *.r.fuq.tv A 108.165.47.221"
echo -e " *.r.fuq.tv A 108.165.47.203"
echo ""
echo -e "🧪 Test access:"
echo -e " curl https://test.fuq.tv"
rm /tmp/haproxy.cfg