修改docker0網段

其中10.172.254.254是Getway,不要自作主張改成 192.168.1.0/24 or 10.172.0.0之類的..

yum install bridge-utils -y

service docker stop
ip link set dev docker0 down
brctl delbr docker0
iptables -t nat -F POSTROUTING


brctl addbr docker0
ip addr add 10.172.254.254/16 dev docker0
ip link set dev docker0 up


cat << EOF > /etc/docker/daemon.json
{
  "bip": "10.172.254.254/16"
}
EOF

systemctl daemon-reload
systemctl restart docker.service
reboot #如果有k8s則需要重開機

refer: https://blog.yowko.com/docker-172-17-ip/

在centos 7上安裝nvidia docker

#安裝 docker-ce
yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
yum install docker-ce-18.06.0.ce -y
systemctl enable docker && systemctl start docker


# 移除舊版nvidia-docker
docker volume ls -q -f driver=nvidia-docker | xargs -r -I{} -n1 docker ps -q -a -f volume={} | xargs -r docker rm -f
sudo yum remove nvidia-docker

# 加入repositories
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.repo | sudo tee /etc/yum.repos.d/nvidia-docker.repo

# 安裝 nvidia-docker2
sudo yum install -y nvidia-docker2
sudo pkill -SIGHUP dockerd

# 測試
docker run --runtime=nvidia --rm nvidia/cuda:9.0-base nvidia-smi

refer: https://github.com/NVIDIA/nvidia-docker

k8s安裝ingress nginx

ingress-nginx最新版已經不包含default-http-backend.
因此安裝0.20.0

#安裝ingress-nginx
wget https://raw.githubusercontent.com/kubernetes/ingress-nginx/nginx-0.20.0/deploy/mandatory.yaml
sed -i 's/serviceAccountName: nginx-ingress-serviceaccount/hostNetwork: true\n      serviceAccountName: nginx-ingress-serviceaccount/g' mandatory.yaml
kubectl apply -f mandatory.yaml
rm -f mandatory.yaml*
kubectl get pod --all-namespaces

#修改ingress-nginx
NODE_COUNT=$(kubectl get nodes | grep -v master | grep -v STATUS | wc -l)
echo $NODE_COUNT
if [ $NODE_COUNT -gt 1 ] ; then
kubectl -n ingress-nginx patch deployment default-http-backend --patch $(echo "{\"spec\":{\"replicas\":$NODE_COUNT}}")
kubectl -n ingress-nginx patch deployment nginx-ingress-controller --patch $(echo "{\"spec\":{\"replicas\":$NODE_COUNT}}")
fi
kubectl get pods -n ingress-nginx -o wide

#更換自製 http-backend image
DOMAIN=ssl.cbe.tw
kubectl -n ingress-nginx patch deployment default-http-backend --patch "{\"spec\":{\"template\":{\"spec\":{\"containers\":[{\"name\":\"default-http-backend\",\"resources\":{\"limits\":{\"cpu\":\"100m\",\"memory\":\"200Mi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"200Mi\"}},\"image\":\"slanla/apache-defaultbackend\",\"ports\":[{\"containerPort\":8080,\"protocol\":\"TCP\"}],\"env\":[{\"name\":\"LETSENCRYPT_PROXYPASS_URL\",\"value\":\"http://$DOMAIN/.well-known/acme-challenge/ connectiontimeout=15 timeout=30\"},{\"name\":\"LETSENCRYPT_PROXYPASSREVERSE_URL\",\"value\":\"http://$DOMAIN/.well-known/acme-challenge/\"}],\"livenessProbe\":{\"httpGet\":{\"path\":\"/healthz\",\"port\":8080,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":30,\"timeoutSeconds\":5,\"periodSeconds\":10,\"successThreshold\":1,\"failureThreshold\":3}}]}}}}"
kubectl get pods -n ingress-nginx -o wide

刪除所有k8s上面的pod

刪除所有k8s上面的pod

kubectl get pods --all-namespaces -o wide | awk '{print $1 " " $2}' | while read AA BB; do kubectl delete pod --grace-period=0 --force -n $AA $BB; done

刪除所有k8s上面所有非Running的pod

kubectl get pods --all-namespaces -o wide | grep -v Running | awk '{print $1 " " $2}' | while read AA BB; do kubectl delete pod --grace-period=0 --force -n $AA $BB; done

使用helm部署nfs-client

之前都用yaml部署nfs-client.
但缺點是每次kubernets跟nfs-client版本更換之後,
就可能會發生部署有問題,像是權限…等

剛剛在kubernetes 1.12上面用之前1.10所用的nfs-client之yaml檔案.
結果又出問題了.
後來發現helm有提供nfs-client部署方式.
二話不說,立刻改用helm部署.
語法如下:

helm install stable/nfs-client-provisioner \
  --name nfs-client \
  --set nfs.server=xxx.xxx.xxx.xxx \
  --set nfs.path=/path \
  --set storageClass.name=managed-nfs-storage

mongDB帳號設定

參考下列指令:

# 環境準備
docker rm -f mongodb_mongo_1
rm -r -f $PWD/db
mkdir -p $PWD/db

# 建立mongoDB
docker run \
  -d \
  --name mongodb_mongo_1 \
  -p 27017:27017 \
  -v $PWD/db:/data/db \
  mongo
sleep 5

# 建立超級管理者
docker exec -it mongodb_mongo_1 bash
mongo admin
db.createUser({ user: "admin" , pwd: "admin1234", roles: ["userAdminAnyDatabase", "dbAdminAnyDatabase", "readWriteAnyDatabase"]})
quit()
# 重新登入
mongo admin
db.auth("admin","admin1234")
use octblog
db.createUser({
    user: "gevin",
    pwd: "gevin1234",
    roles: [ { role: "readWrite", db: "octblog" },
             { role: "readWrite", db: "octblog-log" } ]
})
quit()

mongo octblog
db.auth("gevin","gevin1234")
use octblog
db.collection.insert({'test':1234});
quit()
exit

# 重啟mongoDB
docker rm -f mongodb_mongo_1
docker run \
  -d \
  --name mongodb_mongo_1 \
  -p 27017:27017 \
  -v $PWD/db:/data/db \
  mongo mongod --auth

重新產生k8s join指令

因為新版的k8s的token已經有時間效期,
所以在安裝完後,隔一陣子想要加入新的節點就會出現舊的join token無法使用.
這時候可以用kubeadm token generate重新產生新的token,並用下列語法印出join指令.

kubeadm token create `kubeadm token generate` --print-join-command --ttl=0