Complete documentation for future sessions
- CLAUDE.md for AI agents to understand the codebase - GITEA-GUIDE.md centralizes all Gitea operations (API, Registry, Auth) - DEVELOPMENT-WORKFLOW.md explains complete dev process - ROADMAP.md, NEXT-SESSION.md for planning - QUICK-REFERENCE.md, TROUBLESHOOTING.md for daily use - 40+ detailed docs in /docs folder - Backend as submodule from Gitea Everything documented for autonomous operation. Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
This commit is contained in:
221
scripts/install-k3s-cluster.sh
Executable file
221
scripts/install-k3s-cluster.sh
Executable file
@@ -0,0 +1,221 @@
|
||||
#!/bin/bash
|
||||
# AiWorker K3s HA Cluster Installation Script
|
||||
# Location: Houston, Texas (us-hou-1)
|
||||
# K3s Version: v1.35.0+k3s1
|
||||
|
||||
set -e
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
echo -e "${GREEN}🚀 AiWorker K3s HA Cluster Installation${NC}"
|
||||
echo "========================================"
|
||||
|
||||
# Configuration
|
||||
K3S_VERSION="v1.35.0+k3s1"
|
||||
CONTROL_PLANE_IPS=("108.165.47.233" "108.165.47.235" "108.165.47.215")
|
||||
CONTROL_PLANE_PRIVATE=("10.100.0.2" "10.100.0.3" "10.100.0.4")
|
||||
WORKER_IPS=("108.165.47.225" "108.165.47.224" "108.165.47.222")
|
||||
WORKER_PRIVATE=("10.100.0.5" "10.100.0.6" "10.100.0.7")
|
||||
|
||||
# Step 1: Install first control plane with cluster-init
|
||||
echo -e "\n${YELLOW}Step 1/5: Installing first control plane (HA mode)${NC}"
|
||||
ssh -o StrictHostKeyChecking=no root@${CONTROL_PLANE_IPS[0]} "curl -sfL https://get.k3s.io | \
|
||||
INSTALL_K3S_VERSION=${K3S_VERSION} \
|
||||
INSTALL_K3S_EXEC='server \
|
||||
--cluster-init \
|
||||
--disable traefik \
|
||||
--disable servicelb \
|
||||
--node-name k8s-cp-01 \
|
||||
--node-ip ${CONTROL_PLANE_PRIVATE[0]} \
|
||||
--flannel-iface eth1 \
|
||||
--tls-san ${CONTROL_PLANE_IPS[0]} \
|
||||
--tls-san ${CONTROL_PLANE_IPS[1]} \
|
||||
--tls-san ${CONTROL_PLANE_IPS[2]} \
|
||||
--tls-san ${CONTROL_PLANE_PRIVATE[0]} \
|
||||
--tls-san ${CONTROL_PLANE_PRIVATE[1]} \
|
||||
--tls-san ${CONTROL_PLANE_PRIVATE[2]}' \
|
||||
sh -"
|
||||
|
||||
echo -e "${GREEN}✓ First control plane installed${NC}"
|
||||
|
||||
# Get K3s token
|
||||
echo -e "\n${YELLOW}Retrieving K3s token...${NC}"
|
||||
K3S_TOKEN=$(ssh root@${CONTROL_PLANE_IPS[0]} "cat /var/lib/rancher/k3s/server/node-token")
|
||||
echo -e "${GREEN}✓ Token retrieved${NC}"
|
||||
|
||||
# Download kubeconfig
|
||||
echo -e "\n${YELLOW}Downloading kubeconfig...${NC}"
|
||||
ssh root@${CONTROL_PLANE_IPS[0]} "cat /etc/rancher/k3s/k3s.yaml" | \
|
||||
sed "s/127.0.0.1/${CONTROL_PLANE_IPS[0]}/g" > ~/.kube/aiworker-config
|
||||
chmod 600 ~/.kube/aiworker-config
|
||||
echo -e "${GREEN}✓ Kubeconfig saved to ~/.kube/aiworker-config${NC}"
|
||||
|
||||
# Wait for first node to be ready
|
||||
echo -e "\n${YELLOW}Waiting for first node to be ready...${NC}"
|
||||
sleep 10
|
||||
|
||||
# Step 2: Join additional control planes
|
||||
echo -e "\n${YELLOW}Step 2/5: Joining additional control planes${NC}"
|
||||
for i in 1 2; do
|
||||
echo " Installing k8s-cp-0$((i+1))..."
|
||||
ssh -o StrictHostKeyChecking=no root@${CONTROL_PLANE_IPS[$i]} "curl -sfL https://get.k3s.io | \
|
||||
INSTALL_K3S_VERSION=${K3S_VERSION} \
|
||||
K3S_TOKEN='${K3S_TOKEN}' \
|
||||
INSTALL_K3S_EXEC='server \
|
||||
--server https://${CONTROL_PLANE_PRIVATE[0]}:6443 \
|
||||
--disable traefik \
|
||||
--disable servicelb \
|
||||
--node-name k8s-cp-0$((i+1)) \
|
||||
--node-ip ${CONTROL_PLANE_PRIVATE[$i]} \
|
||||
--flannel-iface eth1 \
|
||||
--tls-san ${CONTROL_PLANE_IPS[0]} \
|
||||
--tls-san ${CONTROL_PLANE_IPS[1]} \
|
||||
--tls-san ${CONTROL_PLANE_IPS[2]}' \
|
||||
sh -" &
|
||||
done
|
||||
|
||||
wait
|
||||
echo -e "${GREEN}✓ All control planes installed${NC}"
|
||||
|
||||
sleep 15
|
||||
|
||||
# Step 3: Join worker nodes
|
||||
echo -e "\n${YELLOW}Step 3/5: Joining worker nodes${NC}"
|
||||
for i in 0 1 2; do
|
||||
echo " Installing k8s-worker-0$((i+1))..."
|
||||
ssh -o StrictHostKeyChecking=no root@${WORKER_IPS[$i]} "curl -sfL https://get.k3s.io | \
|
||||
INSTALL_K3S_VERSION=${K3S_VERSION} \
|
||||
K3S_TOKEN='${K3S_TOKEN}' \
|
||||
K3S_URL='https://${CONTROL_PLANE_PRIVATE[0]}:6443' \
|
||||
INSTALL_K3S_EXEC='--node-name k8s-worker-0$((i+1)) \
|
||||
--node-ip ${WORKER_PRIVATE[$i]} \
|
||||
--flannel-iface eth1' \
|
||||
sh -" &
|
||||
done
|
||||
|
||||
wait
|
||||
echo -e "${GREEN}✓ All workers joined${NC}"
|
||||
|
||||
sleep 15
|
||||
|
||||
# Step 4: Verify cluster
|
||||
echo -e "\n${YELLOW}Step 4/5: Verifying cluster${NC}"
|
||||
export KUBECONFIG=~/.kube/aiworker-config
|
||||
kubectl get nodes -o wide
|
||||
|
||||
# Step 5: Install core components
|
||||
echo -e "\n${YELLOW}Step 5/5: Installing core components${NC}"
|
||||
|
||||
# Nginx Ingress
|
||||
echo " Installing Nginx Ingress Controller..."
|
||||
kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/main/deploy/static/provider/baremetal/deploy.yaml
|
||||
kubectl wait --for=condition=ready pod -l app.kubernetes.io/component=controller -n ingress-nginx --timeout=300s
|
||||
echo -e "${GREEN}✓ Nginx Ingress installed${NC}"
|
||||
|
||||
# Cert-Manager
|
||||
echo " Installing Cert-Manager..."
|
||||
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.16.2/cert-manager.yaml
|
||||
kubectl wait --for=condition=ready pod -l app.kubernetes.io/component=controller -n cert-manager --timeout=300s
|
||||
echo -e "${GREEN}✓ Cert-Manager installed${NC}"
|
||||
|
||||
# Create Let's Encrypt issuers
|
||||
cat <<EOL | kubectl apply -f -
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: letsencrypt-prod
|
||||
spec:
|
||||
acme:
|
||||
server: https://acme-v02.api.letsencrypt.org/directory
|
||||
email: hector+aiworker@teamsuqad.io
|
||||
privateKeySecretRef:
|
||||
name: letsencrypt-prod
|
||||
solvers:
|
||||
- http01:
|
||||
ingress:
|
||||
class: nginx
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: letsencrypt-staging
|
||||
spec:
|
||||
acme:
|
||||
server: https://acme-staging-v02.api.letsencrypt.org/directory
|
||||
email: hector+aiworker@teamsuqad.io
|
||||
privateKeySecretRef:
|
||||
name: letsencrypt-staging
|
||||
solvers:
|
||||
- http01:
|
||||
ingress:
|
||||
class: nginx
|
||||
EOL
|
||||
echo -e "${GREEN}✓ Let's Encrypt issuers created${NC}"
|
||||
|
||||
# Create namespaces
|
||||
cat <<EOL | kubectl apply -f -
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: control-plane
|
||||
labels:
|
||||
name: control-plane
|
||||
environment: production
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: agents
|
||||
labels:
|
||||
name: agents
|
||||
environment: production
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: gitea
|
||||
labels:
|
||||
name: gitea
|
||||
environment: production
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: monitoring
|
||||
labels:
|
||||
name: monitoring
|
||||
environment: production
|
||||
EOL
|
||||
echo -e "${GREEN}✓ Project namespaces created${NC}"
|
||||
|
||||
# ArgoCD
|
||||
echo " Installing ArgoCD..."
|
||||
kubectl create namespace argocd
|
||||
kubectl apply -n argocd -f https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml
|
||||
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=argocd-server -n argocd --timeout=300s
|
||||
echo -e "${GREEN}✓ ArgoCD installed${NC}"
|
||||
|
||||
# Get ArgoCD password
|
||||
ARGOCD_PASSWORD=$(kubectl get secret argocd-initial-admin-secret -n argocd -o jsonpath="{.data.password}" | base64 -d)
|
||||
|
||||
echo -e "\n${GREEN}========================================${NC}"
|
||||
echo -e "${GREEN}✅ Cluster installation complete!${NC}"
|
||||
echo -e "${GREEN}========================================${NC}"
|
||||
echo ""
|
||||
echo -e "📊 Cluster Status:"
|
||||
kubectl get nodes
|
||||
echo ""
|
||||
echo -e "🔐 Access Information:"
|
||||
echo -e " Kubeconfig: ~/.kube/aiworker-config"
|
||||
echo -e " ArgoCD: https://argocd.fuq.tv"
|
||||
echo -e " Username: admin"
|
||||
echo -e " Password: ${ARGOCD_PASSWORD}"
|
||||
echo ""
|
||||
echo -e "💡 Next steps:"
|
||||
echo -e " 1. Configure DNS: *.fuq.tv → 108.165.47.221, 108.165.47.203"
|
||||
echo -e " 2. Deploy applications via ArgoCD"
|
||||
echo -e " 3. Access ArgoCD at https://argocd.fuq.tv"
|
||||
129
scripts/setup-load-balancers.sh
Executable file
129
scripts/setup-load-balancers.sh
Executable file
@@ -0,0 +1,129 @@
|
||||
#!/bin/bash
|
||||
# Configure HAProxy Load Balancers for AiWorker K3s Cluster
|
||||
|
||||
set -e
|
||||
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m'
|
||||
|
||||
echo -e "${GREEN}🔧 Configuring Load Balancers${NC}"
|
||||
|
||||
LB_IPS=("108.165.47.221" "108.165.47.203")
|
||||
LB_NAMES=("k8s-lb-01" "k8s-lb-02")
|
||||
|
||||
# Get Nginx Ingress NodePort ports
|
||||
echo -e "\n${YELLOW}Getting Nginx Ingress NodePorts...${NC}"
|
||||
HTTP_PORT=$(kubectl --kubeconfig ~/.kube/aiworker-config get svc -n ingress-nginx ingress-nginx-controller -o jsonpath='{.spec.ports[?(@.port==80)].nodePort}')
|
||||
HTTPS_PORT=$(kubectl --kubeconfig ~/.kube/aiworker-config get svc -n ingress-nginx ingress-nginx-controller -o jsonpath='{.spec.ports[?(@.port==443)].nodePort}')
|
||||
|
||||
echo " HTTP NodePort: ${HTTP_PORT}"
|
||||
echo " HTTPS NodePort: ${HTTPS_PORT}"
|
||||
|
||||
# Create HAProxy configuration
|
||||
cat > /tmp/haproxy.cfg <<EOF
|
||||
global
|
||||
log /dev/log local0
|
||||
log /dev/log local1 notice
|
||||
chroot /var/lib/haproxy
|
||||
stats socket /run/haproxy/admin.sock mode 660 level admin
|
||||
stats timeout 30s
|
||||
user haproxy
|
||||
group haproxy
|
||||
daemon
|
||||
maxconn 4000
|
||||
|
||||
defaults
|
||||
log global
|
||||
mode http
|
||||
option httplog
|
||||
option dontlognull
|
||||
timeout connect 5000
|
||||
timeout client 50000
|
||||
timeout server 50000
|
||||
|
||||
# Frontend HTTP (port 80)
|
||||
frontend http_frontend
|
||||
bind *:80
|
||||
mode http
|
||||
option httplog
|
||||
option forwardfor
|
||||
default_backend http_backend
|
||||
|
||||
# Backend HTTP - Workers NodePort ${HTTP_PORT}
|
||||
backend http_backend
|
||||
mode http
|
||||
balance roundrobin
|
||||
option httpchk GET /healthz
|
||||
http-check expect status 200
|
||||
server k8s-worker-01 10.100.0.5:${HTTP_PORT} check
|
||||
server k8s-worker-02 10.100.0.6:${HTTP_PORT} check
|
||||
server k8s-worker-03 10.100.0.7:${HTTP_PORT} check
|
||||
|
||||
# Frontend HTTPS (port 443)
|
||||
frontend https_frontend
|
||||
bind *:443
|
||||
mode tcp
|
||||
option tcplog
|
||||
default_backend https_backend
|
||||
|
||||
# Backend HTTPS - Workers NodePort ${HTTPS_PORT} (TCP passthrough)
|
||||
backend https_backend
|
||||
mode tcp
|
||||
balance roundrobin
|
||||
option tcp-check
|
||||
server k8s-worker-01 10.100.0.5:${HTTPS_PORT} check
|
||||
server k8s-worker-02 10.100.0.6:${HTTPS_PORT} check
|
||||
server k8s-worker-03 10.100.0.7:${HTTPS_PORT} check
|
||||
|
||||
# Stats interface
|
||||
frontend stats
|
||||
bind *:8404
|
||||
mode http
|
||||
stats enable
|
||||
stats uri /stats
|
||||
stats refresh 10s
|
||||
stats auth admin:aiworker2026
|
||||
EOF
|
||||
|
||||
# Deploy to both load balancers
|
||||
for i in 0 1; do
|
||||
echo -e "\n${YELLOW}Configuring ${LB_NAMES[$i]}...${NC}"
|
||||
|
||||
# Install HAProxy if not installed
|
||||
ssh root@${LB_IPS[$i]} "which haproxy || (apt update && apt install -y haproxy)"
|
||||
|
||||
# Deploy configuration
|
||||
scp /tmp/haproxy.cfg root@${LB_IPS[$i]}:/etc/haproxy/haproxy.cfg
|
||||
|
||||
# Restart HAProxy
|
||||
ssh root@${LB_IPS[$i]} "systemctl restart haproxy && systemctl enable haproxy"
|
||||
|
||||
# Verify
|
||||
if ssh root@${LB_IPS[$i]} "systemctl is-active haproxy" | grep -q "active"; then
|
||||
echo -e "${GREEN}✓ ${LB_NAMES[$i]} configured and running${NC}"
|
||||
else
|
||||
echo -e "${RED}✗ ${LB_NAMES[$i]} failed to start${NC}"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
echo -e "\n${GREEN}========================================${NC}"
|
||||
echo -e "${GREEN}✅ Load Balancers configured!${NC}"
|
||||
echo -e "${GREEN}========================================${NC}"
|
||||
echo ""
|
||||
echo -e "📊 HAProxy Stats:"
|
||||
echo -e " LB-01: http://108.165.47.221:8404/stats"
|
||||
echo -e " LB-02: http://108.165.47.203:8404/stats"
|
||||
echo -e " Credentials: admin / aiworker2026"
|
||||
echo ""
|
||||
echo -e "🌐 DNS Configuration:"
|
||||
echo -e " *.fuq.tv A 108.165.47.221"
|
||||
echo -e " *.fuq.tv A 108.165.47.203"
|
||||
echo -e " *.r.fuq.tv A 108.165.47.221"
|
||||
echo -e " *.r.fuq.tv A 108.165.47.203"
|
||||
echo ""
|
||||
echo -e "🧪 Test access:"
|
||||
echo -e " curl https://test.fuq.tv"
|
||||
|
||||
rm /tmp/haproxy.cfg
|
||||
Reference in New Issue
Block a user