[k8s] 建立使用者及namespace

建立namespaces

kubectl create ns slanla

建立user

kubectl -n tw-sgis create sa slanla

RBAC 授權

建立規則

cat <<EOF > slanla-user-role.yml
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  namespace: tw-sgis
  name: slanla-user-pod
rules:
- apiGroups: ["*"]
  resources: ["pods", "pods/log"]
  verbs: ["get", "watch", "list", "update", "create", "delete"]
EOF
kubectl apply -f slanla-user-role.yml

授權對象

kubectl create rolebinding slanla-view-pod \
  --role=slanla-user-pod \
  --serviceaccount=tw-sgis:slanla \
  --namespace=tw-sgis

產生設定檔

取得 secret 資訊

SECRET=$(kubectl -n tw-sgis get sa slanla -o go-template='{{range .secrets}}{{.name}}{{end}}')

設定API Server

API_SERVER="https://xxx.xxx.xxx.xxx:6443"

取得 ca

CA_CERT=$(kubectl -n tw-sgis get secret ${SECRET} -o yaml | awk '/ca.crt:/{print $2}')

建立

cat <<EOF > slanla.conf
apiVersion: v1
kind: Config
clusters:
- cluster:
    certificate-authority-data: $CA_CERT
    server: $API_SERVER
  name: cluster
EOF

取得token

TOKEN=$(kubectl -n tw-sgis get secret ${SECRET} -o go-template='{{.data.token}}')

設定 token

kubectl config set-credentials slanla-user \
  --token=`echo ${TOKEN} | base64 -d` \
  --kubeconfig=slanla.conf

建立context: default

kubectl config set-context default \
  --cluster=cluster \
  --user=slanla-user \
  --kubeconfig=slanla.conf

指定context: default

kubectl config use-context default \
  --kubeconfig=slanla.conf

install docker 18.06 on raspberry pi

最近在raspberry pi上面安裝docker 18.09後,
會發生無法啟動docker的問題.
因此需要降版至18.06,語法如下:

sudo apt-mark unhold docker-ce
sudo apt-get purge -y docker-ce
sudo apt-get autoremove -y --purge docker-ce
sudo apt-get autoclean
sudo rm -rf /var/lib/docker
export VERSION=18.06 && curl -sSL get.docker.com | sh
sudo apt-mark hold docker-ce

修改docker0網段

其中10.172.254.254是Getway,不要自作主張改成 192.168.1.0/24 or 10.172.0.0之類的..

yum install bridge-utils -y

service docker stop
ip link set dev docker0 down
brctl delbr docker0
iptables -t nat -F POSTROUTING


brctl addbr docker0
ip addr add 10.172.254.254/16 dev docker0
ip link set dev docker0 up


cat << EOF > /etc/docker/daemon.json
{
  "bip": "10.172.254.254/16"
}
EOF

systemctl daemon-reload
systemctl restart docker.service
reboot #如果有k8s則需要重開機

refer: https://blog.yowko.com/docker-172-17-ip/

在centos 7上安裝nvidia docker

#安裝 docker-ce
yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
yum install docker-ce-18.06.0.ce -y
systemctl enable docker && systemctl start docker


# 移除舊版nvidia-docker
docker volume ls -q -f driver=nvidia-docker | xargs -r -I{} -n1 docker ps -q -a -f volume={} | xargs -r docker rm -f
sudo yum remove nvidia-docker

# 加入repositories
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.repo | sudo tee /etc/yum.repos.d/nvidia-docker.repo

# 安裝 nvidia-docker2
sudo yum install -y nvidia-docker2
sudo pkill -SIGHUP dockerd

# 測試
docker run --runtime=nvidia --rm nvidia/cuda:9.0-base nvidia-smi

refer: https://github.com/NVIDIA/nvidia-docker

k8s安裝ingress nginx

ingress-nginx最新版已經不包含default-http-backend.
因此安裝0.20.0

#安裝ingress-nginx
wget https://raw.githubusercontent.com/kubernetes/ingress-nginx/nginx-0.20.0/deploy/mandatory.yaml
sed -i 's/serviceAccountName: nginx-ingress-serviceaccount/hostNetwork: true\n      serviceAccountName: nginx-ingress-serviceaccount/g' mandatory.yaml
kubectl apply -f mandatory.yaml
rm -f mandatory.yaml*
kubectl get pod --all-namespaces

#修改ingress-nginx
NODE_COUNT=$(kubectl get nodes | grep -v master | grep -v STATUS | wc -l)
echo $NODE_COUNT
if [ $NODE_COUNT -gt 1 ] ; then
kubectl -n ingress-nginx patch deployment default-http-backend --patch $(echo "{\"spec\":{\"replicas\":$NODE_COUNT}}")
kubectl -n ingress-nginx patch deployment nginx-ingress-controller --patch $(echo "{\"spec\":{\"replicas\":$NODE_COUNT}}")
fi
kubectl get pods -n ingress-nginx -o wide

#更換自製 http-backend image
DOMAIN=ssl.cbe.tw
kubectl -n ingress-nginx patch deployment default-http-backend --patch "{\"spec\":{\"template\":{\"spec\":{\"containers\":[{\"name\":\"default-http-backend\",\"resources\":{\"limits\":{\"cpu\":\"100m\",\"memory\":\"200Mi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"200Mi\"}},\"image\":\"slanla/apache-defaultbackend\",\"ports\":[{\"containerPort\":8080,\"protocol\":\"TCP\"}],\"env\":[{\"name\":\"LETSENCRYPT_PROXYPASS_URL\",\"value\":\"http://$DOMAIN/.well-known/acme-challenge/ connectiontimeout=15 timeout=30\"},{\"name\":\"LETSENCRYPT_PROXYPASSREVERSE_URL\",\"value\":\"http://$DOMAIN/.well-known/acme-challenge/\"}],\"livenessProbe\":{\"httpGet\":{\"path\":\"/healthz\",\"port\":8080,\"scheme\":\"HTTP\"},\"initialDelaySeconds\":30,\"timeoutSeconds\":5,\"periodSeconds\":10,\"successThreshold\":1,\"failureThreshold\":3}}]}}}}"
kubectl get pods -n ingress-nginx -o wide

刪除所有k8s上面的pod

刪除所有k8s上面的pod

kubectl get pods --all-namespaces -o wide | awk '{print $1 " " $2}' | while read AA BB; do kubectl delete pod --grace-period=0 --force -n $AA $BB; done

刪除所有k8s上面所有非Running的pod

kubectl get pods --all-namespaces -o wide | grep -v Running | awk '{print $1 " " $2}' | while read AA BB; do kubectl delete pod --grace-period=0 --force -n $AA $BB; done

使用helm部署nfs-client

之前都用yaml部署nfs-client.
但缺點是每次kubernets跟nfs-client版本更換之後,
就可能會發生部署有問題,像是權限…等

剛剛在kubernetes 1.12上面用之前1.10所用的nfs-client之yaml檔案.
結果又出問題了.
後來發現helm有提供nfs-client部署方式.
二話不說,立刻改用helm部署.
語法如下:

helm install stable/nfs-client-provisioner \
  --name nfs-client \
  --set nfs.server=xxx.xxx.xxx.xxx \
  --set nfs.path=/path \
  --set storageClass.name=managed-nfs-storage