I am following this article to set up a HA k8s cluster:
Guide: Kubernetes Multi-Master HA Cluster with kubeadm
I have three master nodes(3,4,5) and four worker nodes(2,6,7,8) (one of the worker nodes is HAProxy Load Balancer)
In "Installing and configuring Etcd on all 3 Master Nodes" section, step6, I get the following error in master node 3:
{"level":"warn","ts":"2023-03-28T17:21:07.929-0700","caller":"clientv3/retry_interceptor.go:62","msg":"retrying of unary invoker failed","target":"endpoint://client-beafcd7b-fbf5-4c3e-b9ce-5c1032e26041/127.0.0.1:2379","attempt":0,"error":"rpc error: code = DeadlineExceeded desc = context deadline exceeded"}
Error: context deadline exceeded
However, in other master nodes(4 and 5), I get the following:
162974ed2b5b12b2, started, 192.168.60.4, https://192.168.60.4:2380, https://192.168.60.4:2379, false
5642d9d9da8c08a3, started, 192.168.60.3, https://192.168.60.3:2380, https://192.168.60.3:2379, false
6ffc3bfbd773170f, started, 192.168.60.5, https://192.168.60.5:2380, https://192.168.60.5:2379, false
here is etcd.service for node 3:
[Unit]
Description=etcd
Documentation=https://github.com/coreos
[Service]
ExecStart=/usr/local/bin/etcd \
--name 192.168.60.3 \
--cert-file=/etc/etcd/kubernetes.pem \
--key-file=/etc/etcd/kubernetes-key.pem \
--peer-cert-file=/etc/etcd/kubernetes.pem \
--peer-key-file=/etc/etcd/kubernetes-key.pem \
--trusted-ca-file=/etc/etcd/ca.pem \
--peer-trusted-ca-file=/etc/etcd/ca.pem \
--peer-client-cert-auth \
--client-cert-auth \
--initial-advertise-peer-urls https://192.168.60.3:2380 \
--listen-peer-urls https://192.168.60.3:2380 \
--listen-client-urls https://192.168.60.3:2379,http://127.0.0.1:2379 \
--advertise-client-urls https://192.168.60.3:2379 \
--initial-cluster-token etcd-cluster-0 \
--initial-cluster 192.168.60.3=https://192.168.60.3:2380,192.168.60.4=https://192.168.60.4:2380,192.168.60.5=https://192.168.60.5:2380 \
--initial-cluster-state new \
--data-dir=/var/lib/etcd
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target
etcd.service for node 4:
[Unit]
Description=etcd
Documentation=https://github.com/coreos
[Service]
ExecStart=/usr/local/bin/etcd \
--name 192.168.60.4 \
--cert-file=/etc/etcd/kubernetes.pem \
--key-file=/etc/etcd/kubernetes-key.pem \
--peer-cert-file=/etc/etcd/kubernetes.pem \
--peer-key-file=/etc/etcd/kubernetes-key.pem \
--trusted-ca-file=/etc/etcd/ca.pem \
--peer-trusted-ca-file=/etc/etcd/ca.pem \
--peer-client-cert-auth \
--client-cert-auth \
--initial-advertise-peer-urls https://192.168.60.4:2380 \
--listen-peer-urls https://192.168.60.4:2380 \
--listen-client-urls https://192.168.60.4:2379,http://127.0.0.1:2379 \
--advertise-client-urls https://192.168.60.4:2379 \
--initial-cluster-token etcd-cluster-0 \
--initial-cluster 192.168.60.3=https://192.168.60.3:2380,192.168.60.4=https://192.168.60.4:2380,192.168.60.5=https://192.168.60.5:2380 \
--initial-cluster-state new \
--data-dir=/var/lib/etcd
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target
etcd.service for node 5:
[Unit]
Description=etcd
Documentation=https://github.com/coreos
[Service]
ExecStart=/usr/local/bin/etcd \
--name 192.168.60.5 \
--cert-file=/etc/etcd/kubernetes.pem \
--key-file=/etc/etcd/kubernetes-key.pem \
--peer-cert-file=/etc/etcd/kubernetes.pem \
--peer-key-file=/etc/etcd/kubernetes-key.pem \
--trusted-ca-file=/etc/etcd/ca.pem \
--peer-trusted-ca-file=/etc/etcd/ca.pem \
--peer-client-cert-auth \
--client-cert-auth \
--initial-advertise-peer-urls https://192.168.60.5:2380 \
--listen-peer-urls https://192.168.60.5:2380 \
--listen-client-urls https://192.168.60.5:2379,http://127.0.0.1:2379 \
--advertise-client-urls https://192.168.60.5:2379 \
--initial-cluster-token etcd-cluster-0 \
--initial-cluster 192.168.60.3=https://192.168.60.3:2380,192.168.60.4=https://192.168.60.4:2380,192.168.60.5=https://192.168.60.5:2380 \
--initial-cluster-state new \
--data-dir=/var/lib/etcd
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target
if I use :
ETCDCTL_API=2 etcdctl member list
I can get :
client: etcd cluster is unavailable or misconfigured; error #0: client: endpoint http://127.0.0.1:2379 exceeded header timeout
; error #1: dial tcp 127.0.0.1:4001: connect: connection refused