Kubeadm 安装 k8s 集群指南

使用kubeadm安装k8s,附带使用外部tls加密的etcd集群连接配置

etcd tls 集群安装:[[ETCD 集群安装配置]]
tls 证书创建:[[CFSSL 创建证书]]

前置工作

前置工作需要在所有的节点上执行

配置要求

CPU 推荐两核或者更多
内存 不得小于 2G
MAC地址 保证唯一
交换分区 禁用
节点之间保持网络通畅

修改主机名

各个节点修改成自己的名字

hostnamectl set-hostname <name>

修改 hosts

配置各个节点的ip主机名映射

# vim /etc/hosts
192.168.5.128 k8s-master

关闭防火墙, 开启内核网络参数

systemctl stop firewalld
systemctl disable firewalld

# vi /etc/sysctl.conf  # 编辑配置文件
# 追加下面两行
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1

sysctl -p  # 应用配置

# RockLinux 9.1 arm 版本 最小化安装
# 需要额外执行下面的操作,并加入系统自动启动
modprobe br_netfilter

echo modprobe br_netfilter >> /etc/rc.d/rc.local
chmod +x /etc/rc.d/rc.local

关闭 SELinux

setenforce 0
sed -i "s/SELINUX=enforcing/SELINUX=disabled/g" /etc/selinux/config

关闭 swap

注释掉 /etc/fstab 文件中包含 swap 哪一行, 如下文件内容示例注释

# vim /etc/fstab
# Created by anaconda on Wed Jan  6 20:22:34 2021
#
# Accessible filesystems, by reference, are maintained under '/dev/disk'
# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info
#
/dev/mapper/centos-root /                       ext4    defaults        1 1
UUID=b6a81016-1920-44c6-b713-2547ccbc9adf /boot                   ext4    defaults        1 2
/dev/mapper/centos-home /home                   ext4    defaults        1 2
# /dev/mapper/centos-swap swap                    swap    defaults        0 0

重启

reboot

安装 Docker

所有的节点都必须安装docker且设置服务为开机自动启动

# 移除机器上已经安装的 docker
yum remove docker \
    docker-client \
    docker-client-latest \
    docker-common \
    docker-latest \
    docker-latest-logrotate \
    docker-logrotate \
    docker-engine
    
# 安装依赖
yum install -y yum-utils \
    device-mapper-persistent-data \
    lvm2
# 添加镜像源
yum-config-manager \
--add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
# 安装
yum install docker-ce docker-ce-cli containerd.io -y
# 安装指定版本 docker, 安装其他软件也是一样
yum list docker-ce --showduplicates | sort -r
# Last metadata expiration check: 0:32:36 ago on Mon 16 Aug 2021 02:15:13 PM CST.
# Installed Packages
# docker-ce.x86_64               3:20.10.8-3.el8                 docker-ce-stable
# docker-ce.x86_64               3:20.10.8-3.el8                 @docker-ce-stable
# docker-ce.x86_64               3:20.10.7-3.el8                 docker-ce-stable
# docker-ce.x86_64               3:20.10.6-3.el8                 docker-ce-stable
# docker-ce.x86_64               3:20.10.5-3.el8                 docker-ce-stable
# docker-ce.x86_64               3:20.10.4-3.el8                 docker-ce-stable
# docker-ce.x86_64               3:20.10.3-3.el8                 docker-ce-stable
# docker-ce.x86_64               3:20.10.2-3.el8                 docker-ce-stable
# docker-ce.x86_64               3:20.10.1-3.el8                 docker-ce-stable
# docker-ce.x86_64               3:20.10.0-3.el8                 docker-ce-stable
# docker-ce.x86_64               3:19.03.15-3.el8                docker-ce-stable
# docker-ce.x86_64               3:19.03.14-3.el8                docker-ce-stable
# docker-ce.x86_64               3:19.03.13-3.el8                docker-ce-stable
# Available Packages
# 选择上面 列出的版本进行安装,比如这里安装最新版的 20.10.8
yum install docker-ce-20.10.8-3.el8

# 启动服务,并设置为开机自启
systemctl start docker
systemctl enable docker

# 以下操作可选
# 更换 docker 的镜像源
# vim /etc/docker/daemon.json
{
    "registry-mirrors" : [
    "https://registry.docker-cn.com",
    "https://docker.mirrors.ustc.edu.cn",
    "http://hub-mirror.c.163.com",
    "https://cr.console.aliyun.com/"
  ]
}
# 如果当前用户非 root 用户,需要加入 docker 的用户组
# 加入 docker 组后,需要重启下系统,才能不使用 sudo 使用docker命令
sudo usermod -aG docker <your username>
# 重启docker
sudo systemctl restart docker

k8s 1.24 及以上版本使用 docker 必须执行

需要手动安装 cni 也就是 cri-dockerd

正常的 x 86 架构服务器,可以在 release 下载自己的安装包安装即可。 arm 架构的需要手动编译安装

RockyLinux 9.1 编译安装过程,事先安装好 docker,并启动。

# 配置 go sdk
git clone https://github.com/Mirantis/cri-dockerd.git
cd cri-dockerd
mkdir bin
go build -o bin/cri-dockerd
mkdir -p /usr/local/bin
install -o root -g root -m 0755 bin/cri-dockerd /usr/local/bin/cri-dockerd
cp -a packaging/systemd/* /usr/lib/systemd/system
sed -i -e 's,/usr/bin/cri-dockerd,/usr/local/bin/cri-dockerd,' /usr/lib//systemd/system/cri-docker.service
systemctl daemon-reload
systemctl enable cri-docker.service
systemctl enable --now cri-docker.socket

安装 kubeadm,kubelet,kubectl

添加镜像源

# vim /etc/yum.repos.d/kubernetes.repo

# 内容, 注意 gpgkey 是一行, 两个 https 中间使用空格拆分
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-$basearch
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg

# 清理缓存,重建
yum clean all&&yum makecache

开始安装

yum install -y kubelet-1.21.4 kubeadm-1.21.4 kubectl-1.21.4

# Rocklinux 9.1 
yum install -y kubelet-1.26.1 kubeadm-1.26.1 kubectl-1.26.1

准备初始化集群<Master节点>

查看默认的初始化配置文件, 并导出成文件

kubeadm config print init-defaults > init-defaults.yaml

普通版本:按照下方示例提示文字,进行修改 如果使用 calico ,请事先查看本文章节 [[#配置 calico 网络(1.26.1 已验证)]]

apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
  - system:bootstrappers:kubeadm:default-node-token
  token: abcdef.0123456780abcdef   # token 设置
  ttl: 24h0m0s
  usages:
  - signing
  - authentication
kind: InitConfiguration
localAPIEndpoint:
  advertiseAddress: 192.168.5.128   # master 对外访问ip
  bindPort: 6443
nodeRegistration:
  criSocket: /var/run/dockershim.sock
  name: k8s-master   # master节点名称, 此名称须加入 hosts 文件解析
  taints: null
---
apiServer:
  timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns:
  type: CoreDNS
etcd:
  local:
    dataDir: /var/lib/etcd
imageRepository: registry.aliyuncs.com/google_containers  # 修改镜像地址
kind: ClusterConfiguration
kubernetesVersion: 1.26.0   # 待安装的 k8s 版本
networking:
  dnsDomain: cluster.local
  serviceSubnet: 10.244.0.0/16 # flannel 默认网段,为了方便如果不是本值,请修改为本值
scheduler: {}

如果是使用外部的etcd集群,并且是tls加密的需要把证书复制到所有节点相同的位置,修改初始化配置文件etcd部分内容,完整配置示例文件内容如下:

apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
  - system:bootstrappers:kubeadm:default-node-token
  token: abcdef.0123456789abcdef     # token 设置,其他子节点加入集群时使用
  ttl: 24h0m0s
  usages:
  - signing
  - authentication
kind: InitConfiguration
localAPIEndpoint:
  advertiseAddress: 192.168.5.200   # master 对外访问ip
  bindPort: 6443
nodeRegistration:
  criSocket: /var/run/dockershim.sock
  name: k8s-master-1     # master节点名称, 此名称须加入 hosts 文件解析
  taints: null
---
apiServer:
  timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns:
  type: CoreDNS
# 配置 etcd 集群信息
etcd:
  external:
    endpoints:
    - https://192.168.5.200:2379
    - https://192.168.5.201:2379
    - https://192.168.5.202:2379
    - https://192.168.5.203:2379
    - https://192.168.5.204:2379
    caFile: /root/etcd/cert/ca.pem
    certFile: /root/etcd/cert/etcd.pem
    keyFile: /root/etcd/cert/etcd-key.pem
# 原本的 etcd 配置内容,数据存储在本地
# etcd:
#  local:
#    dataDir: /var/lib/etcd
imageRepository: registry.aliyuncs.com/google_containers  # 修改镜像地址
kind: ClusterConfiguration
kubernetesVersion: 1.21.0   # k8s 版本
networking:
  dnsDomain: cluster.local
  serviceSubnet: 10.244.0.0/16 # flannel 默认网段,为了方便如果不是本值,请修改为本值
scheduler: {}

查看并下载镜像

可以事先下载然后导入到自己本地的docker

# 查看需要下载那些镜像
[root@k8s-master k8s-install-file]# kubeadm config images list --config init-defaults.yaml
registry.aliyuncs.com/k8sxio/kube-apiserver:v1.22.0
registry.aliyuncs.com/k8sxio/kube-controller-manager:v1.22.0
registry.aliyuncs.com/k8sxio/kube-scheduler:v1.22.0
registry.aliyuncs.com/k8sxio/kube-proxy:v1.22.0
registry.aliyuncs.com/k8sxio/pause:3.5
registry.aliyuncs.com/k8sxio/etcd:3.5.0-0
registry.aliyuncs.com/k8sxio/coredns:v1.8.4

# 开始下载镜像,防止直接安装因为某个镜像下载失败,导致整体安装失败
kubeadm config images pull --config init-defaults.yaml

# 如果发生下面这种拉取镜像错误,尝试使用docker直接搜索镜像,然后使用 docker tag 重新打标即可
[root@k8s-master k8s-install-file]# kubeadm config images pull --config init-defaults.yaml
[config/images] Pulled registry.aliyuncs.com/k8sxio/kube-apiserver:v1.21.0
[config/images] Pulled registry.aliyuncs.com/k8sxio/kube-controller-manager:v1.21.0
[config/images] Pulled registry.aliyuncs.com/k8sxio/kube-scheduler:v1.21.0
[config/images] Pulled registry.aliyuncs.com/k8sxio/kube-proxy:v1.21.0
[config/images] Pulled registry.aliyuncs.com/k8sxio/pause:3.4.1
[config/images] Pulled registry.aliyuncs.com/k8sxio/etcd:3.4.13-0
failed to pull image "registry.aliyuncs.com/k8sxio/coredns:v1.8.0": output: Error response from daemon: manifest for registry.aliyuncs.com/k8sxio/coredns:v1.8.0 not found: manifest unknown: manifest unknown
, error: exit status 1
To see the stack trace of this error execute with --v=5 or higher

# 搜索镜像上面报错的镜像
[root@k8s-master k8s-install-file]# docker search coredns:v1.8.0
NAME                       DESCRIPTION                              STARS     OFFICIAL   AUTOMATED
louwy001/coredns-coredns   k8s.gcr.io/coredns/coredns:v1.8.0        1
ninokop/coredns            k8s.gcr.io/coredns/coredns:v1.8.0        0
xwjh/coredns               from k8s.gcr.io/coredns/coredns:v1.8.0   0
hhhlhh/coredns-coredns     FROM k8s.gcr.io/coredns/coredns:v1.8.0   0
suxishuo/coredns           k8s.gcr.io/coredns/coredns:v1.8.0        0
fengbb/coredns             k8s.gcr.io/coredns/coredns:v1.8.0        0

# 随便挑选一个拉取镜像
[root@k8s-master k8s-install-file]# docker pull louwy001/coredns-coredns:v1.8.0
v1.8.0: Pulling from louwy001/coredns-coredns
c6568d217a00: Pull complete
5984b6d55edf: Pull complete
Digest: sha256:10ecc12177735e5a6fd6fa0127202776128d860ed7ab0341780ddaeb1f6dfe61
Status: Downloaded newer image for louwy001/coredns-coredns:v1.8.0
docker.io/louwy001/coredns-coredns:v1.8.0

# 重新打标,并取消多余的tag名称
[root@k8s-master k8s-install-file]# docker tag louwy001/coredns-coredns:v1.8.0 registry.aliyuncs.com/k8sxio/coredns:v1.8.0
[root@k8s-master k8s-install-file]#
[root@k8s-master k8s-install-file]# docker rmi louwy001/coredns-coredns:v1.8.0
Untagged: louwy001/coredns-coredns:v1.8.0
Untagged: louwy001/coredns-coredns@sha256:10ecc12177735e5a6fd6fa0127202776128d860ed7ab0341780ddaeb1f6dfe6[root@k8s-master k8s-install-file]#

卸载集群

如果 初始化集群失败了,或者参数错误,直接执行下面的命令还原设置

kubeadm reset
iptables -F 
iptables -X
ipvsadm -C
rm -rf /etc/cni/net.d
rm -rf $HOME/.kube/config 

开始初始化

kubeadm init --config init-defaults.yaml

初始化完成后, 根据提示执行初始设置, 并记录下 加入集群的命令和参数

# 集群配置文件
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

# 开机自启 kubelet
systemctl enable kubelet.service

# 加入集群
kubeadm join 192.168.5.128:6443 --token abcdef.0123456780abcdef \
  --discovery-token-ca-cert-hash sha256:d27cf2fd4a45c3ce8c59cdf0163edbf7cd4bc55a994a34404c0e175a47770798

# 1.24.x 加入集群需要指定 --cri-socket 参数才可以正常加入,参考本文下一节

# 如果事后忘记保存加入命令,在master上执行下面的命令重新获取一个加入token
kubeadm token create --print-join-command

# 查看已有的token
kubeadm token list

其他节点接入集群

确认安装好 kubeadm , kubelet, kubectl

在节点机器上执行上面提示的 加入集群命令, 并设置kubelet为开机自启在 master节点上拷贝集群配置文件给node, 这样 node才能正常使用kubectl命令,也可以不操作这一步

systemctl enable kubelet.service
scp /etc/kubernetes/admin.conf k8s-node-1:~/.kube/config

1.24.x 以后的版本由于 cri 的问题,如果还是使用 docker 的话,加入集群需要指定 cri-socket

kubeadm join 192.168.36.200:6443 --token abcdef.0123456789abcdef         --discovery-token-ca-cert-hash sha256:9af4803dd7446649d887ebdf0da47edc5e713fa9cb4e32bf7d7f8f49e75cb8fa --cri-socket=unix:///run/cri-dockerd.sock

配置 calico 网络(1.26.1 已验证)

官网快速开始地址: https://docs.tigera.io/calico/3.25/getting-started/kubernetes/quickstart#install-calico

根据官网下载两个 yaml 配置文件

wget https://raw.githubusercontent.com/projectcalico/calico/v3.25.0/manifests/tigera-operator.yaml

wget https://raw.githubusercontent.com/projectcalico/calico/v3.25.0/manifests/custom-resources.yaml

修改第二个配置文件 custom-resources.yaml 中的 ipPools.cidr 值为你局域网中不重复的网段,并且该值必须为 kubeadm-config 中 podSubnet 配置的值。

title: 提示

关于 podSubnet 设置有以下几种方式

- podSubnet 可以在使用 kubeadm init 初始化集群的时候使用 --pod-network-cidr=192.168.0.0/16 指定
- 修改 kubeadm config 导出的集群初始化配置文件中:networking.podSubnet 的值,不存在该字段就手动添加上。
- 如果错过了初始化,可以直接修改集群中的 kubeadm-config,使用命令:kubectl edit configmap kubeadm-config -n kube-system -o yaml,找到 networking 添加字段 podSubnet 如果存在就修改该值。
title: 关于 serviceSubnet  podSubnet 区别
Kubernetes集群中`serviceSubnet``podSubnet`是两个不同的网络子网,用于不同的目的。

- `serviceSubnet`是用于定义Service资源的IP地址池,每个Service都会被分配一个在这个子网中的虚拟IP地址ClusterIP),用于负载均衡到后端Pod。默认情况下,`serviceSubnet`CIDR为`10.96.0.0/12`,可以通过在kube-apiserver启动参数中指定`--service-cluster-ip-range`选项来修改它。例如:`--service-cluster-ip-range=10.244.0.0/16`
    
- `podSubnet`是用于定义Pod网络的IP地址池。每个Pod都会被分配一个在这个子网中的IP地址。这些IP地址是Pod内部使用的,由kubelet代理在每个节点上创建的网络命名空间中分配。默认情况下,`podSubnet`CIDR也为`10.244.0.0/16`,可以通过在kubelet启动参数中指定`--pod-cidr`选项来修改它。例如:`--pod-cidr=10.244.0.0/16`
    

请注意,由于`serviceSubnet``podSubnet`CIDR相同,因此它们是重叠的。这意味着,如果您使用相同的CIDR来定义它们,可能会导致网络冲突和不可预测的行为。为了避免这种情况,请确保为它们分配不同的CIDR

如果修改了 kubeadm-config 的配置,需要重启集群让该配置生效。其他情况等待 calico 转为正常即可。使用官方提示的命令:

# 查看状态
kubectl get pods -n calico-system

如果上述命令提示命名空间不存在,且配置完成后集群的节点状态没有 ready。使用下面的命令查看 calico 发生了什么错误。

kubectl get tigerastatus -o yaml

配置 Flannel 网络(1.26.x 未验证)

安装 flannel 保证各个节点的pod之间网络通信

修改集群 kube-controller-manager.yaml文件,追加网络参数

vim /etc/kubernetes/manifests/kube-controller-manager.yaml
# 在 command 下面追加两行
--allocate-node-cidrs=true
--cluster-cidr=10.244.0.0/16

# 重启 kubelet
systemctl restart kubelet

如果是多网卡的机器,可能需要指定下网卡, 参考这个 大佬的文章

文章 “安装 Pod Network” 中提到的 :

“另外需要注意的是如果你的节点有多个网卡的话,需要在 kube-flannel.yml 中使用--iface参数指定集群主机内网网卡的名称,否则可能会出现 dns 无法解析。”

我猜应该是在配置文件下面的位置加, 注意Kindmetadata中的信息

apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: kube-flannel-ds
  namespace: kube-system
  labels:
    tier: node
    app: flannel
spec:
  selector:
    matchLabels:
      app: flannel
  template:
    metadata:
      labels:
        tier: node
        app: flannel
    spec:
      ....
      ...
      ..
      .
      containers:
      - name: kube-flannel
        image: quay.io/coreos/flannel:v0.14.0
        command:
        - /opt/bin/flanneld
        args:
        - --ip-masq
        - --kube-subnet-mgr     
        - --iface=ens33    # 这里追加参数   <----------------
        resources:
          requests:
      ...
      ....
      ......

获取flannel部署文件,并下载镜像

curl https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml > kube-flannel.yml

# 查看需要的镜像
cat kube-flannel.yml | grep image
# image: quay.io/coreos/flannel:v0.14.0
# image: quay.io/coreos/flannel:v0.14.0

# 直接下载如果失败的话,就用docker搜索下别人上传的镜像
docker search flannel:v0.14.0
# NAME           DESCRIPTION                           STARS     OFFICIAL   AUTOMATED
# xwjh/flannel   from quay.io/coreos/flannel:v0.14.0   1

# 下载镜像并重新进行打tag, 完事后删除多余的 tag
docker pull xwjh/flannel:v0.14.0
docker tag xwjh/flannel:v0.14.0 quay.io/coreos/flannel:v0.14.0
docker rmi xwjh/flannel:v0.14.0

# 应用配置
kubectl create -f kube-flannel.yml

# 创建成功示例
# [root@k8s-master k8s-install-file]# kubectl create -f kube-flannel.yml
# Warning: policy/v1beta1 PodSecurityPolicy is deprecated in v1.21+, unavailable in v1.25+
# podsecuritypolicy.policy/psp.flannel.unprivileged created
# clusterrole.rbac.authorization.k8s.io/flannel created
# clusterrolebinding.rbac.authorization.k8s.io/flannel created
# serviceaccount/flannel created
# configmap/kube-flannel-cfg created
# daemonset.apps/kube-flannel-ds created
# [root@k8s-master k8s-install-file]#

验证&其他设置

至此k8s简单搭建版到此结束, 后续多个节点,多master之类的查资料设置加入集群即可

验证节点状态

执行kubectl get node查看集群节点状态, 如果你之前没装 flannel直接执行会看到如下信息

[root@k8s-master ~]# kubectl get node
NAME         STATUS     ROLES                  AGE   VERSION
k8s-master   NotReady   control-plane,master   21h   v1.21.4
k8s-node-1   NotReady   <none>                 21h   v1.21.4
[root@k8s-master ~]#

当你flannel正确安装后,会变成如下样式, 两个节点都会变成 Ready状态

[root@k8s-master k8s-install-file]# kubectl get node
NAME         STATUS   ROLES                  AGE   VERSION
k8s-master   Ready    control-plane,master   22h   v1.21.4
k8s-node-1   Ready    <none>                 21h   v1.21.4
[root@k8s-master k8s-install-file]#

验证 coredns 状态

安装完成后查看pod状态可能会出现coredns错误,无法启动:

[root@k8s-master k8s-install-file]# kubectl  get pod --all-namespaces
NAMESPACE     NAME                                 READY   STATUS             RESTARTS   AGE
kube-system   coredns-67574f65b-fh2kq              0/1     ImagePullBackOff   0          22h
kube-system   coredns-67574f65b-qspjm              0/1     ImagePullBackOff   0          22h
kube-system   etcd-k8s-master                      1/1     Running            1          22h
kube-system   kube-apiserver-k8s-master            1/1     Running            1          22h
kube-system   kube-controller-manager-k8s-master   1/1     Running            1          5h44m
kube-system   kube-flannel-ds-h5fd6                1/1     Running            0          7m33s
kube-system   kube-flannel-ds-z945p                1/1     Running            0          7m33s
kube-system   kube-proxy-rmwcx                     1/1     Running            1          21h
kube-system   kube-proxy-vzmjw                     1/1     Running            1          22h
kube-system   kube-scheduler-k8s-master            1/1     Running            1          22h
[root@k8s-master k8s-install-file]#

我们查看下pod的错误信息

root@k8s-master k8s-install-file]# kubectl -n kube-system describe pod coredns-67574f65b-fh2kq
Name:                 coredns-67574f65b-fh2kq
Namespace:            kube-system
Priority:             2000000000
Priority Class Name:  system-cluster-critical
Node:                 k8s-node-1/192.168.5.129
Start Time:           Tue, 17 Aug 2021 14:54:36 +0800
Labels:               k8s-app=kube-dns
                      pod-template-hash=67574f65b
Annotations:          <none>
Status:               Pending
IP:                   10.244.1.3
IPs:
  IP:           10.244.1.3
Controlled By:  ReplicaSet/coredns-67574f65b
Containers:
  coredns:
    Container ID:
    Image:         registry.aliyuncs.com/k8sxio/coredns:v1.8.0
    Image ID:
    Ports:         53/UDP, 53/TCP, 9153/TCP
    Host Ports:    0/UDP, 0/TCP, 0/TCP
    Args:
      -conf
      /etc/coredns/Corefile
    State:          Waiting
      Reason:       ImagePullBackOff
    Ready:          False
    Restart Count:  0
    Limits:
      memory:  170Mi
    Requests:
      cpu:        100m
      memory:     70Mi
    Liveness:     http-get http://:8080/health delay=60s timeout=5s period=10s #success=1 #failure=5
    Readiness:    http-get http://:8181/ready delay=0s timeout=1s period=10s #success=1 #failure=3
    Environment:  <none>
    Mounts:
      /etc/coredns from config-volume (ro)
      /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-trjcg (ro)
Conditions:
  Type              Status
  Initialized       True
  Ready             False
  ContainersReady   False
  PodScheduled      True
Volumes:
  config-volume:
    Type:      ConfigMap (a volume populated by a ConfigMap)
    Name:      coredns
    Optional:  false
  kube-api-access-trjcg:
    Type:                    Projected (a volume that contains injected data from multiple sources)
    TokenExpirationSeconds:  3607
    ConfigMapName:           kube-root-ca.crt
    ConfigMapOptional:       <nil>
    DownwardAPI:             true
QoS Class:                   Burstable
Node-Selectors:              kubernetes.io/os=linux
Tolerations:                 CriticalAddonsOnly op=Exists
                             node-role.kubernetes.io/control-plane:NoSchedule
                             node-role.kubernetes.io/master:NoSchedule
                             node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
                             node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
  Type     Reason            Age                     From               Message
  ----     ------            ----                    ----               -------
  Warning  FailedScheduling  4h53m (x1020 over 21h)  default-scheduler  0/2 nodes are available: 2 node(s) had taint {node.kubernetes.io/not-ready:}, that the pod didn't tolerate.
  Warning  FailedScheduling  8m6s (x9 over 14m)      default-scheduler  0/2 nodes are available: 2 node(s) had taint {node.kubernetes.io/not-ready:}, that the pod didn't tolerate.
  Normal   Scheduled         7m56s                   default-scheduler  Successfully assigned kube-system/coredns-67574f65b-fh2kq to k8s-node-1
  Normal   Pulling           6m27s (x4 over 7m54s)   kubelet            Pulling image "registry.aliyuncs.com/k8sxio/coredns:v1.8.0"
  Warning  Failed            6m26s (x4 over 7m53s)   kubelet            Failed to pull image "registry.aliyuncs.com/k8sxio/coredns:v1.8.0": rpc error: code = Unknown desc = Error response from daemon: manifest for registry.aliyuncs.com/k8sxio/coredns:v1.8.0 not found: manifest unknown: manifestunknown
  Warning  Failed            6m26s (x4 over 7m53s)   kubelet            Error: ErrImagePull
  Warning  Failed            6m15s (x6 over 7m53s)   kubelet            Error: ImagePullBackOff
  Normal   BackOff           2m45s (x21 over 7m53s)  kubelet            Back-off pulling image "registry.aliyuncs.com/k8sxio/coredns:v1.8.0"

发现错误是拉取镜像失败, 但是master节点确实存在这个镜像, 那这个指的就是 node节点上缺少镜像,我们导出master上的registry.aliyuncs.com/k8sxio/coredns:v1.8.0拷贝给node节点导入即可

docker save -o coredns.zip registry.aliyuncs.com/k8sxio/coredns:v1.8.0
scp coredns.zip k8s-node-1:~

# node 节点
docker load -i coredns.zip

重新查看状态

[root@k8s-master k8s-install-file]# kubectl -n kube-system get pods
NAME                                 READY   STATUS    RESTARTS   AGE
coredns-67574f65b-fh2kq              1/1     Running   0          22h
coredns-67574f65b-qspjm              1/1     Running   0          22h
etcd-k8s-master                      1/1     Running   1          22h
kube-apiserver-k8s-master            1/1     Running   1          22h
kube-controller-manager-k8s-master   1/1     Running   1          5h58m
kube-flannel-ds-h5fd6                1/1     Running   0          21m
kube-flannel-ds-z945p                1/1     Running   0          21m
kube-proxy-rmwcx                     1/1     Running   1          22h
kube-proxy-vzmjw                     1/1     Running   1          22h
kube-scheduler-k8s-master            1/1     Running   1          22h
[root@k8s-master k8s-install-file]#

node 节点角色为 none

查看节点详细信息, 可以看到node节点为none角色, 我们手动指定节点为worker

[root@k8s-master k8s-install-file]# kubectl get node -o wide
NAME         STATUS   ROLES                  AGE   VERSION   INTERNAL-IP     EXTERNAL-IP   OS-IMAGE         KERNEL-VERSION                 CONTAINER-RUNTIME
k8s-master   Ready    control-plane,master   22h   v1.21.4   192.168.5.128   <none>        CentOS Linux 8   4.18.0-305.12.1.el8_4.x86_64   docker://20.10.8
k8s-node-1   Ready    <none>                 22h   v1.21.4   192.168.5.129   <none>        CentOS Linux 8   4.18.0-305.12.1.el8_4.x86_64   docker://20.10.8
[root@k8s-master k8s-install-file]

执行下面的命令修改节点角色

kubectl label node <node name> node-role.kubernetes.io/node=
[root@k8s-master k8s-install-file]# kubectl label node k8s-node-1 node-role.kubernetes.io/node=
node/k8s-node-1 labeled
[root@k8s-master k8s-install-file]#
[root@k8s-master k8s-install-file]# kubectl get node
NAME         STATUS   ROLES                  AGE   VERSION
k8s-master   Ready    control-plane,master   22h   v1.21.4
k8s-node-1   Ready    node                   22h   v1.21.4
[root@k8s-master k8s-install-file]#

设置节点角色

# 设置节点为 master
kubectl label node <node name> node-role.kubernetes.io/master=

# 设置 test2 为 node 角色
kubectl label node <node name> node-role.kubernetes.io/node=

# 设置 master 一般情况下不接受负载
kubectl taint node <node name> node-role.kubernetes.io/master=true:NoSchedule

# 设置 master 不运行pod
kubectl taint node <node name> node-role.kubernetes.io/master=:NoSchedule

# 删除节点标签<角色>, 只要修改 = 号为 - 号即可
kubectl label node k8s-node-1 node-role.kubernetes.io/node-

所有节点都允许运行pod

kubectl taint nodes --all node-role.kubernetes.io/master-

# 多次运行后是这个提示, 正常可以参考官网提示
# [root@k8s-master k8s-install-file]# kubectl taint nodes --all node-role.kubernetes.io/master-
# taint "node-role.kubernetes.io/master" not found
# taint "node-role.kubernetes.io/master" not found
# [root@k8s-master k8s-install-file]#

修改 NodePort 端口范围

默认端口号范围是 30000-32767 修改后等一会儿就可以生效

# vim /etc/kubernetes/manifests/kube-apiserver.yaml
# 在 command 末尾追加下面的参数,设置范围为 0-65535 全端口
- --service-node-port-range=0-65535

安装ingress-nginx

参考官网: https://kubernetes.github.io/ingress-nginx/deploy/#docker-desktop

使用官网提供的示例文件Deployment—>Installation Guide—>Docker Desktop

# 保存官网提供的文件
kubectl apply -f ingress-nginx.yaml

镜像拉取失败使用梯子或者搜索别人上传的