跳到主要内容

kubernetes安装

· 阅读需 4 分钟
GavinTan
DevOps Engineer

准备系统环境

关闭 swap

swapoff -a
sed -i 's/.*swap/#&/' /etc/fstab

修改内核参数

cat <<EOF > /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_nonlocal_bind = 1
net.ipv4.ip_forward = 1
vm.swappiness = 0
vm.max_map_count = 262144
net.netfilter.nf_conntrack_max = 1000000
EOF

cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
br_netfilter
EOF

echo "* soft nofile 65536" >> /etc/security/limits.conf
echo "* hard nofile 65536" >> /etc/security/limits.conf

modprobe br_netfilter
sysctl -p /etc/sysctl.d/k8s.conf

安装 docker

curl -fsSL https://get.docker.com/ | sh -s -- --mirror Aliyun

创建 docker 配置文件

/etc/docker/daemon.json
{
"registry-mirrors": ["https://fl791z1h.mirror.aliyuncs.com"],
"exec-opts": ["native.cgroupdriver=systemd"],
"log-driver": "json-file",
"log-opts": {
"max-size": "100m"
},
"storage-driver": "overlay2"
}

安装 kubernetes

cat <<EOF | sudo tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-\$basearch
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
exclude=kubelet kubeadm kubectl
EOF
  1. 安装工具

    yum install -y kubelet kubeadm kubectl --disableexcludes=kubernetes
  2. 初始化集群

    信息

    --service-cidr service ip 范围

    --pod-network-cidr pod ip 范围

    出现 tc not found in system path 错误: yum install -y iproute-tc

    # 高可用集群使用参数--control-plane-endpoint=mycluster:443
    kubeadm init --kubernetes-version=1.18.0 --apiserver-advertise-address=172.16.7.14 --image-repository registry.aliyuncs.com/google_containers --service-cidr=10.10.0.0/16 --pod-network-cidr=10.244.0.0/16
  3. 安装网络插件

    kubectl create -f https://raw.githubusercontent.com/projectcalico/calico/v3.26.4/manifests/tigera-operator.yaml
    kubectl create -f https://raw.githubusercontent.com/projectcalico/calico/v3.26.4/manifests/custom-resources.yaml
    提示

    calico 节点出现 mster calico-node notready 状态以及出现 Connect Socket: Connection reset by peer bird: BGP: Unexpected connect from unknown address

    # 为Calico节点配置IP自动检测,以确保使用正确的IP地址进行路由interface=修改为对应的实际物理网卡
    kubectl set env daemonset/calico-node -n kube-system IP_AUTODETECTION_METHOD=interface=eth.*

集群设置

启用 ipvs 模式

# 修改ConfigMap的kube-system/kube-proxy中的config.conf,mode: "ipvs"

kubectl edit cm kube-proxy -n kube-system

# 修改后重启kube-proxy
kubectl rollout restart daemonset kube-proxy -n kube-system

启用 vip

提示

多 master 须在每个 master 节点上面执行

# 定义vip地址必须同在主机网络非使用的ip
export VIP=172.16.7.18
# 定义网卡接口主机当前网卡
export INTERFACE=eth0

ctr image pull ghcr.io/kube-vip/kube-vip:v0.4.0
ctr run --rm --net-host ghcr.io/kube-vip/kube-vip:v0.4.0 vip /kube-vip manifest pod \
--interface $INTERFACE \
--vip $VIP \
--controlplane \
--services \
--arp \
--leaderElection | tee /etc/kubernetes/manifests/kube-vip.yaml

安装 storageclass

需要修改 deployment.yaml 文件里的 nfs 服务地址与路径

git clone https://github.com/kubernetes-sigs/nfs-subdir-external-provisioner
cd nfs-subdir-external-provisioner && kubectl apply -f deploy/deployment.yaml deploy/rbac.yaml deploy/class.yaml

设置 nfs 默认 sc

kubectl patch storageclass managed-nfs-storage -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'

安装 ingress

kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.1.3/deploy/static/provider/baremetal/deploy.yaml

设置默认 ingress 类

kubectl patch ingressclass nginx -p '{"metadata": {"annotations":{"ingressclass.kubernetes.io/is-default-class":"true"}}}'

ingress-nginx path 被带过去 ingress 配置添加下面配置

annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
信息

添加一个 ingress(通过 ingressClassName 可指定 ingressclass,不指定使用默认 ingressclass)

kind: Ingress
apiVersion: networking.k8s.io/v1
metadata:
namespace: ops
spec:
ingressClassName: nginx
rules:
- host: zabbix.172.16.7.14.nip.io
http:
paths:
- path: /
pathType: ImplementationSpecific
backend:
service:
name: zabbix-web
port:
number: 8080

安装 kubernetes-dashboard

kubectl apply -f https://raw.githubusercontent.com/kubernetes/dashboard/v2.0.0/aio/deploy/recommended.yaml

授权

kubectl create clusterrolebinding serviceaccount-cluster-admin --clusterrole=cluster-admin --user=system:serviceaccount:kubernetes-dashboard:kubernetes-dashboard

#kubectl create clusterrolebinding serviceaccount-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccount

获取 token

kubectl describe secrets -n kubernetes-dashboard $(kubectl -n kubernetes-dashboard get secret|grep kubernetes-dashboard-token|awk '{print $1}')| grep token | awk 'NR==3{print $2}'

集群外部访问

# 修改service配置,找到type,将ClusterIP改成NodePort 设置nodePort端口
kubectl edit svc kubernetes-dashboard -n kubernetes-dashboard

安装 kubeshpere

kubectl apply -f https://github.com/kubesphere/ks-installer/releases/download/v3.1.1/kubesphere-installer.yaml


kubectl apply -f https://github.com/kubesphere/ks-installer/releases/download/v3.1.1/cluster-configuration.yaml

检查安装日志

kubectl logs -n kubesphere-system $(kubectl get pod -n kubesphere-system -l app=ks-install -o jsonpath='{.items[0].metadata.name}') -f

常用操作

触发滚动更新

kubectl rollout restart deploy myapp-deploy -n ops


#旧版本不支持rollout的修改deployment配置文件触发滚动更新
revision=`kubectl -n ops get deploy opsinterface-v1 -ojson|jq -r '.metadata.annotations."deployment.kubernetes.io/revision"|tonumber+1'`
kubectl patch deployment test01-app1 -p '{"spec":{"template": {"metadata": {"annotations": {"deployment.kubernetes.io/revision": $revision}}}}}'

kubectl patch deployment test01-app1 -p '{"spec":{"template": {"metadata": {"annotations": {"kubectl.kubernetes.io/restartedAt": "'`date -Iseconds`'"}}}}}'

新节点加入集群

# 自动生成join命令
kubeadm token create --print-join-command

导出当前集群配置

kubeadm config view > k8s.yaml

更新证书

kubeadm alpha certs renew all --config=k8s.yaml

修改节点 ROLES

kubectl label --overwrite nodes nodename kubernetes.io/role=node1

kubectl 自动补全

echo "source <(kubectl completion bash)" >> ~/.bashrc

疑难解答

卡住 Terminating 状态的资源无法删除

删除资源 yml 配置里的 finalizers 内容即可

kubectl patch ns/myns -p '{"metadata":{"finalizers":[]}}' --type=merge

etcd 无法启动(节点挂掉)

--force-new-cluster 添加该参数覆盖旧集群信息,正常启动后可去掉

vim /etc/kubernetes/manifests/etcd.yaml

# 查看etcd集群信息
etcdctl --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/server.crt --key=/etc/kubernetes/pki/etcd/server.key endpoint status --cluster -w table

删除所有 Evicted Pod

kubectl get pods --all-namespaces -o json | jq '.items[] | select(.status.reason!=null) | select(.status.reason | contains("Evicted")) | "kubectl delete pods \(.metadata.name) -n \(.metadata.namespace)"' | xargs -n 1 bash -c

修复/etc/kubernetes 所有文件

  1. 生成集群配置文件

    kubeadm init phase certs all --config k8s.yml
    kubeadm init phase kubeconfig all --config k8s.yml
    kubeadm init phase control-plane all --config k8s.yml
    kubeadm init phase etcd local --config k8s.yml
  2. 更新 cluster-info 配置

    kubeadm init phase bootstrap-token
  3. 重启控制平面组件

    docker ps |grep -E 'k8s_kube-apiserver|k8s_kube-controller-manager|k8s_kube-scheduler|k8s_etcd_etcd' | awk  '{print $1}'|xargs docker restart
  4. 修复 kubelet 配置

    systemctl stop kubelet
    rm -rf /var/lib/kubelet/pki/ /etc/kubernetes/kubelet.conf
    kubeadm init phase kubeconfig kubelet --config k8s.yml
    kubeadm init phase kubelet-start --config k8s.yml