命令收集
########################### 集群管理 ###########################
#1、#设置节点roles名(master、node)
kubectl label node <node-name> node-role.kubernetes.io/master=
kubectl label node <node-name> node-role.kubernetes.io/node=
#2、设置节点不可调度、可调度
kubectl cordon <node-name> #不可调度
kubectl uncordon <node-name> #取消不可调度
#3、查看集群信息
kubectl cluster-info
#4、查看节点信息
kubectl describe node <node-name>
kubectl describe node <node-name> | grep -A10 "Allocatable" #查看节点的可分配资源上限
#5、查看命名空间信息
kubectl describe namespace <namespace-name>
#6、驱逐node上已经运行的业务容器
kubectl drain --ignore-daemonsets <node-name>
#7、给节点添加标签、删除标签
kubectl label node <node-name> key=value
kubectl label node <node-name> key-
#8、给节点添加污点、删除污点
kubectl taint nodes <node-name> key1=value1:NoSchedule
kubectl taint nodes <node-name> key1:NoSchedule-
#9、查看现有node的label
kubectl get node <node-name> --show-labels
#10、查看污点
kubectl describe nodes <node-name>
#11、查看常驻集群内的后台程序
kubectl get daemonset -A
########################### Pod管理 ###########################
#1、将本地端口转发到pod/svc中的端口
kubectl port-forward -n <namespace> pod/<pod-name> 8088:8088 --address 0.0.0.0
kubectl port-forward -n <namespace> svc/<svc-name> 8080:8080 --address 0.0.0.0
#2、查看pod的cpu和内存
kubectl top pod -n <namespace>
#3、删除pod
kubectl delete pod <pod-name> -n <namespace>
#4、强制删除pod
kubectl delete pod <pod-name> -n <namespace> --grace-period=0 --force
#5、查看pod日志
kubectl logs <pod-name> -n <namespace>
#6、进入正在运行的Pod
kubectl exec -it <pod-name> -n <namespace> -- /bin/bash
#7、查看特定Pod的详细信息
kubectl describe pod <pod-name> -n <namespace>
#8、查看Pod事件
kubectl describe pod <pod-name> -n <namespace> | grep -i events
#9、覆盖修改pod标签
kubectl label pod <pod-name> -n <namespace> key=new_value --overwrite
########################### Deployment管理 ###########################
#1、伸缩Deployment中pod的副本数
kubectl scale deployment <deployment-name> --replicas 4 -n <namespace>
#2、查看Deployment的更新历史
kubectl rollout history deployment <deployment-name> -n <namespace>
#3、回滚到指定版本的Deployment,假设要回滚到第3次修订版
kubectl rollout undo deployment <deployment-name> --to-revision=3 -n <namespace>
########################### Service管理 ###########################
#1、创建Service
kubectl create service clusterip my-service --tcp=80:8080
#2、删除Service
kubectl delete service <service-name> -n <namespace>
#3、暴露Deployment为Service
kubectl expose deployment <deployment-name> -n <namespace> --type=LoadBalancer --port=80 --target-port=8080
kubectl expose deployment <deployment-name> -n <namespace> --type=NodePort --port=80 --target-port=8080
kubectl expose deployment <deployment-name> -n <namespace> --type=LoadBalancer --port=80 --target-port=8080
创建kubeconfig文件
- 一键创建user1用户操作namespace1命名空间的kubeconfig文件脚本: sh createKubeConfig.sh user1 namespace1
#!/bin/bash
USER="$1"
NAMESPACE="$2"
if [[ "$NAMESPACE" == "" ]];then
echo "the namespace is not found, please set it."
exit 1
fi
openssl genrsa -out $USER.key 2048
openssl req -new -key $USER.key -out $USER.csr -subj "/CN=$USER"
cat <<EOF | kubectl apply -f -
apiVersion: certificates.k8s.io/v1
kind: CertificateSigningRequest
metadata:
name: $USER-csr
spec:
signerName: kubernetes.io/kube-apiserver-client
request: $(cat $USER.csr | base64 | tr -d '\n')
usages:
- client auth
EOF
kubectl certificate approve $USER-csr
sleep 5
kubectl get csr $USER-csr -o jsonpath='{.status.certificate}' | base64 -d > $USER.crt
CLUSTER_NAME=$(kubectl config view --minify -o jsonpath='{.clusters[0].name}')
API_SERVER=$(kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}')
kubectl config view --flatten -o jsonpath='{.clusters[0].cluster.certificate-authority-data}' | base64 -d > $USER-ca.crt
kubectl config set-cluster $CLUSTER_NAME \
--certificate-authority=$USER-ca.crt \
--embed-certs=true \
--server=$API_SERVER \
--kubeconfig=$USER-config
kubectl config set-credentials $USER \
--client-certificate=$USER.crt \
--client-key=$USER.key \
--embed-certs=true \
--kubeconfig=$USER-config
kubectl config set-context $USER-context \
--cluster=$CLUSTER_NAME \
--namespace=$NAMESPACE \
--user=$USER \
--kubeconfig=$USER-config
kubectl config use-context $USER-context --kubeconfig=$USER-config
kubectl create role $USER-role -n $NAMESPACE \
--verb=* \
--resource=*
kubectl create rolebinding $USER-binding -n $NAMESPACE \
--role=$USER-role \
--user=$USER
rm -f $USER-ca.crt $USER.crt $USER.csr $USER.key
# ---删除清理
# kubectl delete csr $USER-csr
# kubectl delete rolebindings $USER-binding -n $NAMESPACE
# kubectl delete roles $USER-role -n $NAMESPACE
问题记录
一、kube-proxy的ipvs模式的问题(版本:v1.23.8)
kube-proxy有两种工作模式,分别是iptables(默认)和ipvs,可在配置文件中mode参数指定
当发出的UDP包超MTU时,ipvs模式下会丢弃;而iptables模式则会进行分包重组发送
二、主机在线扩容内存后,kubelet未重启导致pod发生oom问题
扩容后,对于k8s来说使不知道的,一定要重启kubelet使配置重载,或者如果不重启需要通过
修改/sys/fs/cgroup/memory/kubepods.slice/memory.limit_in_bytes的大小指定容器所能使用的最大内存
yaml示例(deployment、service、pv与pvc、pv与sc)
# Deployment的yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: appname
namespace: namespacename
labels:
app: appname
spec:
replicas: 1
selector:
matchLabels:
app: appname
template:
metadata:
labels:
app: appname
version: v1
spec:
containers:
- name: appname
image: registry:5000/imagename:tag
imagePullPolicy: Always
env:
- name: NODE_ENV
value: production
volumeMounts:
- name: logs
mountPath: /home/Apps/logs
volumes:
- name: logs
hostPath:
path: /logs/
# ClusterIP类型Service的yaml
apiVersion: v1
kind: Service
metadata:
name: servicename
namespace: namespacename
labels:
name: servicename
spec:
selector:
app: podappname
type: ClusterIP
ports:
- protocol: TCP
port: 80
targetPort: 8080
---
# NodePort类型Service的yaml
apiVersion: v1
kind: Service
metadata:
name: servicename
namespace: namespacename
labels:
name: servicename
spec:
selector:
app: podappname
type: NodePort
ports:
- name: tcp
protocol: TCP
port: 8080
nodePort: 30000
---
# LoadBalancer类型Service的yaml
apiVersion: v1
kind: Service
metadata:
name: servicename
namespace: namespacename
labels:
name: servicename
spec:
selector:
app: podappname
type: LoadBalancer
ports:
- protocol: TCP
port: 80
targetPort: 8080
############################ PV与PVC ############################
# 创建本地PV的yaml
apiVersion: v1
kind: PersistentVolume
metadata:
name: local-pv-example
spec:
capacity:
storage: 10Gi
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Retain
storageClassName: local-storage
hostPath:
path: /mnt/data
type: DirectoryOrCreate
---
# 创建NFS PV的yaml
apiVersion: v1
kind: PersistentVolume
metadata:
name: nfs-pv-example
spec:
capacity:
storage: 10Gi
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Recycle
storageClassName: nfs
nfs:
server: <nfs-server-ip> # nfs服务ip
path: "/exports/data" # nfs共享目录
---
# 创建PVC的yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: myclaim
namespace: namespace
spec:
storageClassName: local-storage # 或 nfs,需与PV的storageClassName匹配
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
############################ PV与SC ############################
# 创建本地PV的yaml
apiVersion: v1
kind: PersistentVolume
metadata:
name: local-pv-example
spec:
capacity:
storage: 2Gi
accessModes:
- ReadWriteOnce
volumeMode: Filesystem
persistentVolumeReclaimPolicy: Delete
storageClassName: local-storage
local:
path: /mnt/etcd
nodeAffinity:
required:
nodeSelectorTerms:
- matchExpressions:
- key: keyname
operator: In
values:
- keyvalue
---
# 创建nfs的yaml
apiVersion: v1
kind: PersistentVolume
metadata:
name: nfs-pv-example
spec:
capacity:
storage: 2Gi
accessModes:
- ReadWriteOnce
volumeMode: Filesystem
persistentVolumeReclaimPolicy: Delete
storageClassName: nfs
nfs:
server: <nfs-server-ip> # nfs服务ip
path: "/exports/data" # nfs共享目录
---
# 创建SC的yaml
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
name: local-storage # 或 nfs,需与PV的storageClassName匹配
provisioner: kubernetes.io/no-provisioner
volumeBindingMode: WaitForFirstConsumer
Pod 拓扑分布约束
kind: Deployment
apiVersion: apps/v1
metadata:
name: mypod
labels:
foo: bar
spec:
topologySpreadConstraints:
- maxSkew: 1
topologyKey: zone
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
foo: bar
# maxSkew 是pod之间个数的最大允许差值。它必须大于零
# topologyKey 是节点标签
# labelSelector 用于查找匹配此标签的pod
Service代理外部域名
apiVersion: v1
kind: Service
metadata:
name: rabbitmq-proxy
namespace: cloud2
spec:
type: ExternalName
externalName: amqp-cn-rij4ky4bh001.ap-southeast-6.amqp-0.vpc.mq.amqp.yuncs.com
Service代理外部服务
apiVersion: v1
kind: Service
metadata:
name: rabbitmq-proxy
namespace: cloud2
spec:
ports:
- port: 15672
name: http
targetPort: 15672
- port: 5672
name: amqp
targetPort: 5672
---
kind: Endpoints
apiVersion: v1
metadata:
name: rabbitmq-proxy
namespace: cloud2
subsets:
- addresses:
- ip: 192.168.7.54
- ip: 192.168.7.55
- ip: 192.168.7.56
ports:
- port: 15672
name: http
- port: 5672
name: amqp
cronjob+job在所有node节点上同时执行定时任务
apiVersion: batch/v1
kind: CronJob
metadata:
annotations:
description: 定时任务描述
name: node-log-clean
namespace: job
spec:
schedule: "10 03 * * *"
timeZone: "Asia/Shanghai"
concurrencyPolicy: Forbid
failedJobsHistoryLimit: 1
successfulJobsHistoryLimit: 1
suspend: false
jobTemplate:
spec:
parallelism: 2 # 同时运行多少个Pod任务,应该等于node节点数
completions: 2 # 需成功完成多少个Pod任务,等于parallelism
ttlSecondsAfterFinished: 300 # 所有pod任务执行完成后300秒自动删除Job资源
template:
metadata:
labels:
name: node-log-clean
spec:
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: name
operator: In
values:
- node-log-clean
topologyKey: kubernetes.io/hostname
hostNetwork: true
restartPolicy: Never
containers:
- name: node-log-clean
image: alpine:latest
imagePullPolicy: IfNotPresent
command:
- /bin/bash
- -c
- |
find /logs/* -type f -mmin +${log_save_minutes} -name "*.zip" -delete
env:
- name: log_save_minutes
value: "1440"
securityContext:
privileged: true
volumeMounts:
- name: logs
mountPath: /logs
readOnly: false
volumes:
- name: logs
hostPath:
path: /logs
minikube使用
#!/bin/bash
# 已安装docker。二进制minikube包下载地址:
# https://github.com/kubernetes/minikube/releases/tag/v1.35.0
# echo "alias kubectl='minikube kubectl --'" >> ~/.bashrc
# source ~/.bashrc
minikube delete --all --purge
docker system prune -a
# 可先手动下载需要的镜像和文件
# docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kicbase:v0.0.46
# docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:v1.30.0
# docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver:v1.30.0
# docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager:v1.30.0
# docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kube-proxy:v1.30.0
# docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.12-0
# docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:v1.11.1
# docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.9
# docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/storage-provisioner:v5
mkdir -p ~/.minikube/cache/linux/amd64/v1.30.0
wget https://dl.k8s.io/v1.30.0/bin/linux/amd64/kubelet -O ~/.minikube/cache/linux/amd64/v1.30.0/kubelet
wget https://dl.k8s.io/v1.30.0/bin/linux/amd64/kubelet.sha256 -O ~/.minikube/cache/linux/amd64/v1.30.0/kubelet.sha256
wget https://dl.k8s.io/v1.30.0/bin/linux/amd64/kubectl -O ~/.minikube/cache/linux/amd64/v1.30.0/kubectl
wget https://dl.k8s.io/v1.30.0/bin/linux/amd64/kubectl.sha256 -O ~/.minikube/cache/linux/amd64/v1.30.0/kubectl.sha256
wget https://dl.k8s.io/v1.30.0/bin/linux/amd64/kubeadm -O ~/.minikube/cache/linux/amd64/v1.30.0/kubeadm
wget https://dl.k8s.io/v1.30.0/bin/linux/amd64/kubeadm.sha256 -O ~/.minikube/cache/linux/amd64/v1.30.0/kubeadm.sha256
# 创建3个节点的集群
minikube start --force --nodes=3 --cpus=2 --memory=4g \
--kubernetes-version=v1.30.0 \
--service-cluster-ip-range='10.96.0.0/12' \
--image-repository='registry.cn-hangzhou.aliyuncs.com/google_containers' \
--base-image='registry.cn-hangzhou.aliyuncs.com/google_containers/kicbase:v0.0.46'
#重启minikube集群
#minikube stop && minikube start --force
# 查看集群中有哪些镜像:minikube image ls
# 集群好了之后,后续pod所需要的镜像可以通过 minikube image load <imageName> 直接加载到集群中
