K8s RBAC / Keepalived / Harbor / 网络策略 / iptables · 点击展开答案
# 1. 创建 ServiceAccount kubectl create serviceaccount jenkins-sa -n devops # 2. 创建 ClusterRole(允许操作 Deployment/Service/Pod) cat <<EOF | kubectl apply -f - apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: jenkins-deployer rules: - apiGroups: ["apps", ""] resources: ["deployments", "services", "pods", "pods/log"] verbs: ["get", "list", "create", "update", "patch", "delete"] EOF # 3. 绑定 SA 和 ClusterRole kubectl create clusterrolebinding jenkins-binding \ --clusterrole=jenkins-deployer \ --serviceaccount=devops:jenkins-sa # 4. 导出 kubeconfig 给 Jenkins 使用 kubectl create token jenkins-sa -n devops --duration=87600h
# /etc/keepalived/keepalived.conf(Master 节点)
vrrp_instance VI_1 {
state MASTER # 备节点改为 BACKUP
interface eth0 # 绑定网卡
virtual_router_id 51 # 主备必须一致
priority 100 # 备节点设 90,优先级高的为 Master
advert_int 1 # 心跳间隔1秒
authentication {
auth_type PASS
auth_pass 1234 # 主备相同,防止非法节点加入
}
virtual_ipaddress {
192.168.1.100/24 # VIP
}
# 检测 Nginx 是否存活,挂了就降优先级触发漂移
track_script { check_nginx }
}
vrrp_script check_nginx {
script "killall -0 nginx" # 返回非0则判定故障
interval 2
weight -20 # 故障时优先级降20
}
authentication 密码,防止误加入# 下载 offline installer wget https://github.com/goharbor/harbor/releases/download/v2.10.0/harbor-offline-installer-v2.10.0.tgz tar xf harbor-offline-installer-*.tgz && cd harbor # 修改配置 cp harbor.yml.tmpl harbor.yml vim harbor.yml # 关键配置项: # hostname: harbor.yourdomain.com # https.certificate / https.private_key (或注释掉 https 用 http) # harbor_admin_password: Admin@123 # 安装(依赖 docker-compose) ./install.sh --with-trivy # --with-trivy 启用漏洞扫描
--with-trivy 即集成 Trivy 扫描引擎# 每个节点都要做(否则 imagePullBackOff) mkdir -p /etc/docker/certs.d/harbor.yourdomain.com cp harbor.crt /etc/docker/certs.d/harbor.yourdomain.com/ca.crt # containerd 节点额外配置 vim /etc/containerd/config.toml # [plugins."io.containerd.grpc.v1.cri".registry.configs."harbor.xx.com".tls] # ca_file = "/etc/ssl/certs/harbor.crt" systemctl restart containerd
# 场景:外部访问本机 8080,转发到内网 192.168.1.10:80 # 开启内核转发 echo 1 > /proc/sys/net/ipv4/ip_forward # DNAT:进来的包目标地址改掉 iptables -t nat -A PREROUTING -p tcp --dport 8080 \ -j DNAT --to-destination 192.168.1.10:80 # MASQUERADE:出去的包做源地址伪装 iptables -t nat -A POSTROUTING -p tcp -d 192.168.1.10 --dport 80 \ -j MASQUERADE # 查看 nat 表规则 iptables -t nat -L -n --line-numbers # 持久化规则 iptables-save > /etc/sysconfig/iptables
默认 K8s 所有 Pod 互通。NetworkPolicy 是 K8s 的"防火墙",基于标签控制哪些 Pod 能互相访问。需要 CNI 支持(Calico/Cilium 支持,Flannel 不支持)。
# 限制:只有带 app=frontend 标签的 Pod 能访问 app=backend 的 3000 端口
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: backend-allow-frontend
namespace: production
spec:
podSelector:
matchLabels:
app: backend # 这条策略作用于 backend Pod
policyTypes:
- Ingress
ingress:
- from:
- podSelector:
matchLabels:
app: frontend # 只允许 frontend Pod 进来
ports:
- protocol: TCP
port: 3000
# 先建一个"默认拒绝"策略,再按需放行(安全最佳实践)
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: default-deny-ingress
spec:
podSelector: {} # 匹配所有 Pod
policyTypes:
- Ingress # ingress 为空 = 拒绝所有入流量
# 查看某进程的线程数
cat /proc/<pid>/status | grep Threads
# 查看所有线程(H = 显示线程)
top -H -p <pid>
# ps 查看线程(-L 展开线程)
ps -eLf | grep <进程名>
# 系统当前总线程数
ps -eo nlwp | tail -n +2 | awk '{sum+=$1} END {print sum}'
# 查看系统线程数上限
cat /proc/sys/kernel/threads-max
top -H -p 定位后结合 jstack 分析线程堆栈。
# 查看当前隔离级别 SELECT @@transaction_isolation; # 修改会话隔离级别 SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
# 方式1:环境变量注入(不支持热更新)
env:
- name: DB_PASSWORD
valueFrom:
secretKeyRef:
name: db-secret
key: password
# 方式2:挂载为文件(支持热更新,约1分钟生效)
volumeMounts:
- name: config
mountPath: /etc/app/config.yaml
subPath: config.yaml
volumes:
- name: config
configMap:
name: app-config
nginx -s reload)subPath 挂载的文件不会自动更新,这是常见坑# === grep ===
grep -i "error" app.log # 忽略大小写
grep -v "DEBUG" app.log # 排除匹配行
grep -E "ERROR|WARN" app.log # 多关键词(扩展正则)
grep -A3 -B2 "Exception" app.log # 匹配行前2行+后3行(上下文)
grep -c "404" access.log # 统计匹配行数
# === awk ===
awk '{print $1, $7}' access.log # 打印第1、7列
awk -F: '{print $1}' /etc/passwd # 以:为分隔符
awk '$9==500 {print $0}' access.log # 过滤第9列=500的行
awk '{sum+=$10} END{print sum}' a.log # 累加第10列
awk 'NR>=10 && NR<=20' app.log # 打印10-20行
# === sed ===
sed 's/old/new/g' file.txt # 全局替换(不修改原文件)
sed -i 's/old/new/g' file.txt # 原地替换(修改文件)
sed -n '5,10p' file.txt # 打印5到10行
sed '/^#/d' nginx.conf # 删除注释行
sed -i "/server_name/a\ listen 443;" f # 在匹配行后插入
grep "ERROR" app.log | awk '{print $1}' | sort | uniq -c | sort -rn → 统计每分钟 ERROR 次数,面试现场写出这种管道命令加分明显。
# 蓝绿切换:修改 Service 的 selector 标签
# 当前 Service 指向 version: blue
kubectl patch service myapp-svc \
-p '{"spec":{"selector":{"version":"green"}}}'
# 出问题一键回滚
kubectl patch service myapp-svc \
-p '{"spec":{"selector":{"version":"blue"}}}'
# 金丝雀:新旧 Deployment 共存,用同一 Service selector # old-deployment: replicas=9, labels: app=myapp, track=stable # new-deployment: replicas=1, labels: app=myapp, track=canary # Service selector 只用 app=myapp,自动按副本比例分流 # 验证没问题后,逐步扩大新版副本数: kubectl scale deployment myapp-canary --replicas=5 kubectl scale deployment myapp-stable --replicas=5 # 最终全量切换,下线旧版