Complete documentation for future sessions
- CLAUDE.md for AI agents to understand the codebase - GITEA-GUIDE.md centralizes all Gitea operations (API, Registry, Auth) - DEVELOPMENT-WORKFLOW.md explains complete dev process - ROADMAP.md, NEXT-SESSION.md for planning - QUICK-REFERENCE.md, TROUBLESHOOTING.md for daily use - 40+ detailed docs in /docs folder - Backend as submodule from Gitea Everything documented for autonomous operation. Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
This commit is contained in:
221
scripts/install-k3s-cluster.sh
Executable file
221
scripts/install-k3s-cluster.sh
Executable file
@@ -0,0 +1,221 @@
|
||||
#!/bin/bash
|
||||
# AiWorker K3s HA Cluster Installation Script
|
||||
# Location: Houston, Texas (us-hou-1)
|
||||
# K3s Version: v1.35.0+k3s1
|
||||
|
||||
set -e
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
echo -e "${GREEN}🚀 AiWorker K3s HA Cluster Installation${NC}"
|
||||
echo "========================================"
|
||||
|
||||
# Configuration
|
||||
K3S_VERSION="v1.35.0+k3s1"
|
||||
CONTROL_PLANE_IPS=("108.165.47.233" "108.165.47.235" "108.165.47.215")
|
||||
CONTROL_PLANE_PRIVATE=("10.100.0.2" "10.100.0.3" "10.100.0.4")
|
||||
WORKER_IPS=("108.165.47.225" "108.165.47.224" "108.165.47.222")
|
||||
WORKER_PRIVATE=("10.100.0.5" "10.100.0.6" "10.100.0.7")
|
||||
|
||||
# Step 1: Install first control plane with cluster-init
|
||||
echo -e "\n${YELLOW}Step 1/5: Installing first control plane (HA mode)${NC}"
|
||||
ssh -o StrictHostKeyChecking=no root@${CONTROL_PLANE_IPS[0]} "curl -sfL https://get.k3s.io | \
|
||||
INSTALL_K3S_VERSION=${K3S_VERSION} \
|
||||
INSTALL_K3S_EXEC='server \
|
||||
--cluster-init \
|
||||
--disable traefik \
|
||||
--disable servicelb \
|
||||
--node-name k8s-cp-01 \
|
||||
--node-ip ${CONTROL_PLANE_PRIVATE[0]} \
|
||||
--flannel-iface eth1 \
|
||||
--tls-san ${CONTROL_PLANE_IPS[0]} \
|
||||
--tls-san ${CONTROL_PLANE_IPS[1]} \
|
||||
--tls-san ${CONTROL_PLANE_IPS[2]} \
|
||||
--tls-san ${CONTROL_PLANE_PRIVATE[0]} \
|
||||
--tls-san ${CONTROL_PLANE_PRIVATE[1]} \
|
||||
--tls-san ${CONTROL_PLANE_PRIVATE[2]}' \
|
||||
sh -"
|
||||
|
||||
echo -e "${GREEN}✓ First control plane installed${NC}"
|
||||
|
||||
# Get K3s token
|
||||
echo -e "\n${YELLOW}Retrieving K3s token...${NC}"
|
||||
K3S_TOKEN=$(ssh root@${CONTROL_PLANE_IPS[0]} "cat /var/lib/rancher/k3s/server/node-token")
|
||||
echo -e "${GREEN}✓ Token retrieved${NC}"
|
||||
|
||||
# Download kubeconfig
|
||||
echo -e "\n${YELLOW}Downloading kubeconfig...${NC}"
|
||||
ssh root@${CONTROL_PLANE_IPS[0]} "cat /etc/rancher/k3s/k3s.yaml" | \
|
||||
sed "s/127.0.0.1/${CONTROL_PLANE_IPS[0]}/g" > ~/.kube/aiworker-config
|
||||
chmod 600 ~/.kube/aiworker-config
|
||||
echo -e "${GREEN}✓ Kubeconfig saved to ~/.kube/aiworker-config${NC}"
|
||||
|
||||
# Wait for first node to be ready
|
||||
echo -e "\n${YELLOW}Waiting for first node to be ready...${NC}"
|
||||
sleep 10
|
||||
|
||||
# Step 2: Join additional control planes
|
||||
echo -e "\n${YELLOW}Step 2/5: Joining additional control planes${NC}"
|
||||
for i in 1 2; do
|
||||
echo " Installing k8s-cp-0$((i+1))..."
|
||||
ssh -o StrictHostKeyChecking=no root@${CONTROL_PLANE_IPS[$i]} "curl -sfL https://get.k3s.io | \
|
||||
INSTALL_K3S_VERSION=${K3S_VERSION} \
|
||||
K3S_TOKEN='${K3S_TOKEN}' \
|
||||
INSTALL_K3S_EXEC='server \
|
||||
--server https://${CONTROL_PLANE_PRIVATE[0]}:6443 \
|
||||
--disable traefik \
|
||||
--disable servicelb \
|
||||
--node-name k8s-cp-0$((i+1)) \
|
||||
--node-ip ${CONTROL_PLANE_PRIVATE[$i]} \
|
||||
--flannel-iface eth1 \
|
||||
--tls-san ${CONTROL_PLANE_IPS[0]} \
|
||||
--tls-san ${CONTROL_PLANE_IPS[1]} \
|
||||
--tls-san ${CONTROL_PLANE_IPS[2]}' \
|
||||
sh -" &
|
||||
done
|
||||
|
||||
wait
|
||||
echo -e "${GREEN}✓ All control planes installed${NC}"
|
||||
|
||||
sleep 15
|
||||
|
||||
# Step 3: Join worker nodes
|
||||
echo -e "\n${YELLOW}Step 3/5: Joining worker nodes${NC}"
|
||||
for i in 0 1 2; do
|
||||
echo " Installing k8s-worker-0$((i+1))..."
|
||||
ssh -o StrictHostKeyChecking=no root@${WORKER_IPS[$i]} "curl -sfL https://get.k3s.io | \
|
||||
INSTALL_K3S_VERSION=${K3S_VERSION} \
|
||||
K3S_TOKEN='${K3S_TOKEN}' \
|
||||
K3S_URL='https://${CONTROL_PLANE_PRIVATE[0]}:6443' \
|
||||
INSTALL_K3S_EXEC='--node-name k8s-worker-0$((i+1)) \
|
||||
--node-ip ${WORKER_PRIVATE[$i]} \
|
||||
--flannel-iface eth1' \
|
||||
sh -" &
|
||||
done
|
||||
|
||||
wait
|
||||
echo -e "${GREEN}✓ All workers joined${NC}"
|
||||
|
||||
sleep 15
|
||||
|
||||
# Step 4: Verify cluster
|
||||
echo -e "\n${YELLOW}Step 4/5: Verifying cluster${NC}"
|
||||
export KUBECONFIG=~/.kube/aiworker-config
|
||||
kubectl get nodes -o wide
|
||||
|
||||
# Step 5: Install core components
|
||||
echo -e "\n${YELLOW}Step 5/5: Installing core components${NC}"
|
||||
|
||||
# Nginx Ingress
|
||||
echo " Installing Nginx Ingress Controller..."
|
||||
kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/main/deploy/static/provider/baremetal/deploy.yaml
|
||||
kubectl wait --for=condition=ready pod -l app.kubernetes.io/component=controller -n ingress-nginx --timeout=300s
|
||||
echo -e "${GREEN}✓ Nginx Ingress installed${NC}"
|
||||
|
||||
# Cert-Manager
|
||||
echo " Installing Cert-Manager..."
|
||||
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.16.2/cert-manager.yaml
|
||||
kubectl wait --for=condition=ready pod -l app.kubernetes.io/component=controller -n cert-manager --timeout=300s
|
||||
echo -e "${GREEN}✓ Cert-Manager installed${NC}"
|
||||
|
||||
# Create Let's Encrypt issuers
|
||||
cat <<EOL | kubectl apply -f -
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: letsencrypt-prod
|
||||
spec:
|
||||
acme:
|
||||
server: https://acme-v02.api.letsencrypt.org/directory
|
||||
email: hector+aiworker@teamsuqad.io
|
||||
privateKeySecretRef:
|
||||
name: letsencrypt-prod
|
||||
solvers:
|
||||
- http01:
|
||||
ingress:
|
||||
class: nginx
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: letsencrypt-staging
|
||||
spec:
|
||||
acme:
|
||||
server: https://acme-staging-v02.api.letsencrypt.org/directory
|
||||
email: hector+aiworker@teamsuqad.io
|
||||
privateKeySecretRef:
|
||||
name: letsencrypt-staging
|
||||
solvers:
|
||||
- http01:
|
||||
ingress:
|
||||
class: nginx
|
||||
EOL
|
||||
echo -e "${GREEN}✓ Let's Encrypt issuers created${NC}"
|
||||
|
||||
# Create namespaces
|
||||
cat <<EOL | kubectl apply -f -
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: control-plane
|
||||
labels:
|
||||
name: control-plane
|
||||
environment: production
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: agents
|
||||
labels:
|
||||
name: agents
|
||||
environment: production
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: gitea
|
||||
labels:
|
||||
name: gitea
|
||||
environment: production
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: monitoring
|
||||
labels:
|
||||
name: monitoring
|
||||
environment: production
|
||||
EOL
|
||||
echo -e "${GREEN}✓ Project namespaces created${NC}"
|
||||
|
||||
# ArgoCD
|
||||
echo " Installing ArgoCD..."
|
||||
kubectl create namespace argocd
|
||||
kubectl apply -n argocd -f https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml
|
||||
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=argocd-server -n argocd --timeout=300s
|
||||
echo -e "${GREEN}✓ ArgoCD installed${NC}"
|
||||
|
||||
# Get ArgoCD password
|
||||
ARGOCD_PASSWORD=$(kubectl get secret argocd-initial-admin-secret -n argocd -o jsonpath="{.data.password}" | base64 -d)
|
||||
|
||||
echo -e "\n${GREEN}========================================${NC}"
|
||||
echo -e "${GREEN}✅ Cluster installation complete!${NC}"
|
||||
echo -e "${GREEN}========================================${NC}"
|
||||
echo ""
|
||||
echo -e "📊 Cluster Status:"
|
||||
kubectl get nodes
|
||||
echo ""
|
||||
echo -e "🔐 Access Information:"
|
||||
echo -e " Kubeconfig: ~/.kube/aiworker-config"
|
||||
echo -e " ArgoCD: https://argocd.fuq.tv"
|
||||
echo -e " Username: admin"
|
||||
echo -e " Password: ${ARGOCD_PASSWORD}"
|
||||
echo ""
|
||||
echo -e "💡 Next steps:"
|
||||
echo -e " 1. Configure DNS: *.fuq.tv → 108.165.47.221, 108.165.47.203"
|
||||
echo -e " 2. Deploy applications via ArgoCD"
|
||||
echo -e " 3. Access ArgoCD at https://argocd.fuq.tv"
|
||||
Reference in New Issue
Block a user