#!/bin/bash # # 节点健康检查脚本 # 使用方法: bash check-node-health.sh # # 颜色输出 RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color echo -e "${BLUE}================================${NC}" echo -e "${BLUE}K3s 集群健康检查${NC}" echo -e "${BLUE}================================${NC}" echo "" # 1. 检查节点状态 echo -e "${YELLOW}[1/8] 检查节点状态...${NC}" kubectl get nodes -o wide echo "" # 2. 检查节点资源 echo -e "${YELLOW}[2/8] 检查节点资源使用...${NC}" kubectl top nodes 2>/dev/null || echo -e "${YELLOW}⚠ metrics-server 未就绪${NC}" echo "" # 3. 检查系统 Pods echo -e "${YELLOW}[3/8] 检查系统组件...${NC}" kubectl get pods -n kube-system echo "" # 4. 检查 Longhorn echo -e "${YELLOW}[4/8] 检查 Longhorn 存储...${NC}" kubectl get pods -n longhorn-system | head -10 echo "" # 5. 检查 PVC echo -e "${YELLOW}[5/8] 检查持久化存储卷...${NC}" kubectl get pvc -A echo "" # 6. 检查应用 Pods echo -e "${YELLOW}[6/8] 检查应用 Pods...${NC}" kubectl get pods -A | grep -v "kube-system\|longhorn-system\|cert-manager" | head -20 echo "" # 7. 检查 Ingress echo -e "${YELLOW}[7/8] 检查 Ingress 配置...${NC}" kubectl get ingress -A echo "" # 8. 检查证书 echo -e "${YELLOW}[8/8] 检查 SSL 证书...${NC}" kubectl get certificate -A echo "" # 统计信息 echo -e "${BLUE}================================${NC}" echo -e "${BLUE}集群统计信息${NC}" echo -e "${BLUE}================================${NC}" TOTAL_NODES=$(kubectl get nodes --no-headers | wc -l) READY_NODES=$(kubectl get nodes --no-headers | grep " Ready " | wc -l) TOTAL_PODS=$(kubectl get pods -A --no-headers | wc -l) RUNNING_PODS=$(kubectl get pods -A --no-headers | grep "Running" | wc -l) TOTAL_PVC=$(kubectl get pvc -A --no-headers | wc -l) BOUND_PVC=$(kubectl get pvc -A --no-headers | grep "Bound" | wc -l) echo -e "节点总数: ${GREEN}${TOTAL_NODES}${NC} (就绪: ${GREEN}${READY_NODES}${NC})" echo -e "Pod 总数: ${GREEN}${TOTAL_PODS}${NC} (运行中: ${GREEN}${RUNNING_PODS}${NC})" echo -e "PVC 总数: ${GREEN}${TOTAL_PVC}${NC} (已绑定: ${GREEN}${BOUND_PVC}${NC})" echo "" # 健康评分 if [ $READY_NODES -eq $TOTAL_NODES ] && [ $RUNNING_PODS -gt $((TOTAL_PODS * 80 / 100)) ]; then echo -e "${GREEN}✓ 集群健康状态: 良好${NC}" elif [ $READY_NODES -gt $((TOTAL_NODES / 2)) ]; then echo -e "${YELLOW}⚠ 集群健康状态: 一般${NC}" else echo -e "${RED}✗ 集群健康状态: 异常${NC}" fi echo ""