K8S Deployment HA

文章目录

- K8S Deployment HA

1.机器规划

IP	主机名	角色
10.83.195.6	master1	master
10.83.195.7	master2	master
10.83.195.8	master3	master
10.83.195.9	node1	node
10.83.195.10	node2	node
10.83.195.250		VIP

2.前期准备

2.1 安装ansible

# master1节点
yum install -y ansible

2.2 修改 hostname

# 修改hostname
hostnamectl set-hostname xxx

# 配置hosts
# 127.0.0.1 localhost xxx ::1 localhost6xxx 需要保留，否则calico pod会报错
ansible -i /opt/ansible/nodes all -m shell -a "cat >> /etc/hosts<<EOF
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost6 localhost6.localdomain6 localhost6.localdomain
10.83.195.6 master1
10.83.195.7 master2
10.83.195.8 master3
10.83.195.9 node1
10.83.195.10 node2
EOF
"

2.3 配置免密

# 生成ssh密钥对
ssh-keygen 

# root免密
ansible -i /opt/ansible/nodes all -m shell -a "sudo sed -i 's/PermitRootLogin no/PermitRootLogin yes/' /etc/ssh/sshd_config && sudo grep PermitRootLogin /etc/ssh/sshd_config && sudo systemctl restart sshd"

# master1 ssh-copy-id 
ssh-copy-id  10.83.195.6
# 可以把 maste1的公私钥 拷贝到 master2、3节点，方便免密

2.4 时间同步

ansible -i /opt/ansible/nodes all -m shell -a "yum install chrony -y"
ansible -i /opt/ansible/nodes all -m shell -a "systemctl start chronyd && systemctl enable chronyd && chronyc sources"

2.5 系统参数调整

# 临时关闭；关闭swap主要是为了性能考虑
# 通过free命令查看swap是否关闭
ansible -i /opt/ansible/nodes all -m shell -a 'sudo swapoff -a && free'

# 永久关闭        
ansible -i /opt/ansible/nodes all -m shell -a "sudo sed -i 's/.*swap.*/#&/' /etc/fstab"

# 禁用SELinux 
# 临时关闭
ansible -i /opt/ansible/nodes all -m shell -a "setenforce 0"
# 永久禁用
ansible -i /opt/ansible/nodes all -m shell -a "sed -i 's/^SELINUX=enforcing$/SELINUX=disabled/' /etc/selinux/config"

# 关闭防火墙
ansible -i /opt/ansible/nodes all -m shell -a "systemctl stop firewalld && systemctl disable firewalld"

# 允许 iptables 检查桥接流量
ansible -i /opt/ansible/nodes all -m shell -a "sudo modprobe br_netfilter && lsmod | grep br_netfilter"

ansible -i /opt/ansible/nodes all -m shell -a "sudo cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF"

ansible -i /opt/ansible/nodes all -m shell -a "sudo modprobe overlay && sudo modprobe br_netfilter"

# 设置所需的 sysctl 参数，参数在重新启动后保持不变
ansible -i /opt/ansible/nodes all -m shell -a "sudo cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables  = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward                 = 1
EOF"

ansible -i /opt/ansible/nodes all -m shell -a "echo 1|sudo tee /proc/sys/net/ipv4/ip_forward"

# 应用 sysctl 参数而不重新启动
ansible -i /opt/ansible/nodes all -m shell -a "sudo sysctl --system"

2.6 安装 Docker

# centos7
ansible -i /opt/ansible/nodes all -m shell -a "wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo"

# centos8
# wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-8.repo

# 安装yum-config-manager配置工具
ansible -i /opt/ansible/nodes all -m shell -a "sudo yum -y install yum-utils"
# 设置yum源
ansible -i /opt/ansible/nodes all -m shell -a "sudo yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo"

# 软链，修改docker镜像存储目录
ansible -i /opt/ansible/nodes all -m shell -a "sudo mkdir /data/docker && sudo ln -s /data/docker /var/lib/docker"

# 安装docker-ce版本
ansible -i /opt/ansible/nodes all -m shell -a "sudo yum install -y docker-ce"
# 自启、启动
ansible -i /opt/ansible/nodes all -m shell -a "sudo systemctl start docker && sudo systemctl enable docker && sudo docker --version"

# 查看版本号
# sudo docker --version
# 查看版本具体信息
# sudo docker version

# 修改Docker镜像源设置
# 修改文件 /etc/docker/daemon.json，没有这个文件就创建
ansible -i /opt/ansible/nodes all -m shell -a 'sudo cat <<EOF | sudo tee /etc/docker/daemon.json
{
  "registry-mirrors": ["https://ogeydad1.mirror.aliyuncs.com"],
   "exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
'

# 重载、重启 docker
ansible -i /opt/ansible/nodes all -m shell -a "sudo systemctl reload docker &&sudo systemctl restart docker && sudo systemctl status docker"

2.7 部署 Haproxy+Keepalived

K8S Master HA 通过 Haproxy+Keepalived 实现

# 3个master节点上执行
ansible -i /opt/ansible/nodes master -m shell -a "yum install  keepalived haproxy  -y"

修改 haproxy.cfg配置

# vim /etc/haproxy/haproxy.cfg 追加如下配置
frontend k8s-master
  bind 0.0.0.0:16443
  mode tcp
  option tcplog
  tcp-request inspect-delay 5s
  default_backend k8s-master

backend k8s-master
  mode tcp
  option tcplog
  option tcp-check
  balance roundrobin
  default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100
  server master1 10.83.195.6:6443  check inter 10000 fall 2 rise 2 weight 100
  server master2 10.83.195.7:6443  check inter 10000 fall 2 rise 2 weight 100
  server master3 10.83.195.8:6443  check inter 10000 fall 2 rise 2 weight 100

# 分发到其他master
ansible -i /opt/ansible/nodes master -m copy -a "src=/etc/haproxy/haproxy.cfg dest=/etc/haproxy/haproxy.cfg"

修改keepalived.conf配置

# vim /etc/keepalived/keepalived.conf 替换内容
# state: 主节点为MASTER，从节点为BACKUP
# interface: ifconfig 查看网卡名
# priority: MASTER使用101，BACKUP使用100

# master
! Configuration File for keepalived
global_defs {
    script_user root
    enable_script_security
    router_id LVS_DEVEL
}
vrrp_script check_apiserver {
  script "/etc/keepalived/check_k8s.sh"
  interval 3
  weight -2
  fall 2
  rise 2
}

vrrp_instance VI_1 {
    # 主节点为MASTER，从节点为BACKUP
    state MASTER
    # 网卡名
    interface ens192
    virtual_router_id 51
    # MASTER当中使用101，BACKUP当中使用100
    priority 101
    authentication {
        auth_type PASS
        auth_pass admin
    }
    virtual_ipaddress {
        # VIP
        10.83.195.250
    }
    track_script {
        check_k8s
    }
}

# backup
! Configuration File for keepalived
global_defs {
    router_id LVS_DEVEL
}
vrrp_script check_apiserver {
  script "/etc/keepalived/check_k8s.sh"
  interval 3
  weight -2
  fall 2
  rise 2
}

vrrp_instance VI_1 {
    # 主节点为MASTER，从节点为BACKUP
    state BACKUP
    # 网卡名
    interface ens192
    virtual_router_id 51
    # MASTER当中使用101，BACKUP当中使用100
    priority 100
    authentication {
        auth_type PASS
        auth_pass admin
    }
    virtual_ipaddress {
        # VIP
        10.83.195.250
    }
    track_script {
        check_k8s
    }
}

检测脚本 check_k8s.sh

#!/bin/bash

function check_k8s() {
 for ((i=0;i<5;i++));do
  apiserver_pid_id=$(pgrep kube-apiserver)
  if [[ ! -z $apiserver_pid_id ]];then
   return
  else
   sleep 2
  fi
  apiserver_pid_id=0
 done
}

# 1:running  0:stopped
check_k8s
if [[ $apiserver_pid_id -eq 0 ]];then
 /usr/bin/systemctl stop keepalived
 exit 1
else
 exit 0
fi

# 分发
ansible -i /opt/ansible/nodes master -m copy -a "src=/etc/keepalived/check_k8s.sh dest=/etc/keepalived/"
ansible -i /opt/ansible/nodes master -m shell -a "chmod +x /etc/keepalived/check_k8s.sh"

# 启动
ansible -i /opt/ansible/nodes master -m shell -a "systemctl enable --now keepalived haproxy"

# 查看VIP
ip a

3. 部署 K8S

3.1 安装 k8s命令

# 所有节点
ansible -i /opt/ansible/nodes all -m shell -a "sudo cat <<EOF | sudo tee /etc/yum.repos.d/kubernetes.repo
[k8s]
name=k8s
enabled=1
gpgcheck=0
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
EOF
"

# disableexcludes=kubernetes：禁掉除了这个kubernetes之外的别的仓库
ansible -i /opt/ansible/nodes all -m shell -a "yum install -y kubelet-1.23.6 kubeadm-1.23.6 kubectl-1.23.6 --disableexcludes=kubernetes"

# 查看k8s版本 
# sudo kubectl version命令 会报错正常 Unable to connect to the server: dial tcp: lookup localhost on 10.82.26.252:53: no such host
ansible -i /opt/ansible/nodes all -m shell -a "sudo kubectl version && sudo yum info kubeadm"

# 设置为开机自启并现在立刻启动服务 --now：立刻启动服务
ansible -i /opt/ansible/nodes all -m shell -a "sudo systemctl enable --now kubelet && sudo systemctl status kubelet"

3.2 k8s初始化

# master1 节点执行
# --control-plane-endpoint VIP:16443
# --pod-network-cidr=192.168.0.0/16 需要与calico.yaml 文件中的 CALICO_IPV4POOL_CIDR 配置网段一致
kubeadm init --image-repository registry.aliyuncs.com/google_containers --kubernetes-version v1.23.6 --pod-network-cidr=192.168.0.0/16 --control-plane-endpoint 10.83.195.250:16443 --upload-cert

# Your Kubernetes control-plane has initialized successfully!

# To start using your cluster, you need to run the following as a regular user:

#   mkdir -p $HOME/.kube
#   sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
#   sudo chown $(id -u):$(id -g) $HOME/.kube/config

# Alternatively, if you are the root user, you can run:

#   export KUBECONFIG=/etc/kubernetes/admin.conf

# You should now deploy a pod network to the cluster.
# Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
#   https://kubernetes.io/docs/concepts/cluster-administration/addons/

# You can now join any number of the control-plane node running the following command on each as root:

#   kubeadm join 10.83.195.250:16443 --token 6z1jge.6hue81vruwh8msdl \
# 	--discovery-token-ca-cert-hash sha256:a3db8061e0b570e897b2d0e7c243ef7342c51299d04ef649737187e50aee8ea6 \
# 	--control-plane --certificate-key 35e73eae794acd9275445902cfd8d545a0e3b8e017f8d5960bd2e6796f74c386

# Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
# As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
# "kubeadm init phase upload-certs --upload-certs" to reload certs afterward.

# Then you can join any number of worker nodes by running the following on each as root:

# kubeadm join 10.83.195.250:16443 --token 6z1jge.6hue81vruwh8msdl \
# 	--discovery-token-ca-cert-hash sha256:a3db8061e0b570e897b2d0e7c243ef7342c51299d04ef649737187e50aee8ea6

3.3 添加其他master节点

# You can now join any number of the control-plane node running the following command on each as root:

  kubeadm join 10.83.195.250:16443 --token 6z1jge.6hue81vruwh8msdl \
	--discovery-token-ca-cert-hash sha256:a3db8061e0b570e897b2d0e7c243ef7342c51299d04ef649737187e50aee8ea6 \
	--control-plane --certificate-key 35e73eae794acd9275445902cfd8d545a0e3b8e017f8d5960bd2e6796f74c386
	
# 3个master节点
# 临时生效（退出当前窗口重连环境变量失效）
export KUBECONFIG=/etc/kubernetes/admin.conf
# 永久生效（推荐）
echo "export KUBECONFIG=/etc/kubernetes/admin.conf" >> ~/.bash_profile && source  ~/.bash_profile

# 重新部署
# kubeadm reset
# rm -rf $HOME/.kube && rm -rf /etc/cni/net.d && rm -rf /etc/kubernetes/*
# 再执行kubeadm init 命令

3.4 添加 Node节点

# Then you can join any number of worker nodes by running the following on each as root:
# kubeadm token create --print-join-command

kubeadm join 10.83.195.250:16443 --token 6z1jge.6hue81vruwh8msdl \
	--discovery-token-ca-cert-hash sha256:a3db8061e0b570e897b2d0e7c243ef7342c51299d04ef649737187e50aee8ea6

3.5 安装 CNI

# master1 节点
# kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml

# master1节点执行
# 下载 calico 配置文件，可能会网络超时
curl https://docs.projectcalico.org/manifests/calico.yaml -O # 生成重定向链接
curl https://calico-v3-25.netlify.app/archive/v3.25/manifests/calico.yaml -O
kubectl apply -f calico.yaml

# 修改 calico.yaml 文件中的 CALICO_IPV4POOL_CIDR 配置，修改为与初始化的 cidr 相同
# 修改 IP_AUTODETECTION_METHOD 下的网卡名称
# 删除镜像 docker.io/ 前缀，避免下载过慢导致失败
# sed -i 's#docker.io/##g' calico.yaml

3.6 查看pod状态

kubectl get pods -A

3.7 配置IPVS

解决集群内无法ping通ClusterIP（或ServiceName）

# 加载ip_vs相关内核模块
ansible -i /opt/ansible/nodes all -m shell -a "sudo modprobe -- ip_vs && sudo modprobe -- ip_vs_sh && sudo sudo modprobe -- ip_vs_rr && sudo modprobe -- ip_vs_wrr && sudo modprobe -- nf_conntrack_ipv4"

# 验证开启ipvs：
ansible -i /opt/ansible/nodes all -m shell -a "sudo lsmod |grep ip_vs"

# 安装ipvsadm工具
ansible -i /opt/ansible/nodes all -m shell -a "sudo yum install ipset ipvsadm -y"

# 编辑kube-proxy配置文件，mode修改成ipvs
kubectl edit  configmap -n kube-system  kube-proxy

# 先查看
kubectl get pod -n kube-system | grep kube-proxy
# delete让它自拉起
kubectl get pod -n kube-system | grep kube-proxy |awk '{system("kubectl delete pod "$1" -n kube-system")}'
# 再查看
kubectl get pod -n kube-system | grep kube-proxy

# 查看ipvs转发规则
ipvsadm -Ln