跳到主要内容

Ansible使用PsExec远程控制Windows

· 阅读需 3 分钟
GavinTan
DevOps Engineer

概要

  • 无需设置 WinRM 即可从 Linux 主机向 Windows 主机运行远程命令。

  • 可以在 Ansible 控制器上运行,以引导 Windows 主机,使其为 WinRM 做好准备。

官方文档

ansible控制端环境

yum install krb5-devel krb5-workstation

pip install pypsexec smbprotocol[kerberos]

/etc/ansible/ansible.cfg

[defaults]

host_key_checking=False

/etc/ansible/hosts

[win]
192.168.32.20
[win:vars]
ansible_user=administrator
ansible_password=123456
system=win7

被控windows

必须放开445端口 防火墙名称-Netlogon 服务(NP-In)

playbook

配置winrm

config_winrm.yml
---
- hosts: win
gather_facts: no
tasks:
- name: config winrm
local_action:
module: community.windows.psexec
hostname: '{{ hostvars[inventory_hostname]["ansible_host"] | default(inventory_hostname) }}'
connection_username: '{{ ansible_user }}'
connection_password: '{{ ansible_password }}'
encrypt: "{{ 'false' if system == 'win7' else 'true' }}"
executable: powershell.exe
arguments: '-'
stdin: |
#提前设置系统密码,修改网络位置不要选择公用网络
#查看winrm启动状态 winrm enumerate winrm/config/listener
winrm quickconfig -quiet -force
winrm set winrm/config/service/auth '@{Basic="true"}'
winrm set winrm/config/service '@{AllowUnencrypted="true"}'
exit

更新powershell(ansible后续通过winrm或openssh控制windwos必须升级)

install_win32-openssh.yml
---
- hosts: win
gather_facts: no
vars:
fileurl: http://192.168.8.192:8888
tasks:
- name: install .NET 4.6.1
register: install_net_result
local_action:
module: community.windows.psexec
hostname: '{{ hostvars[inventory_hostname]["ansible_host"] | default(inventory_hostname) }}'
connection_username: '{{ ansible_user }}'
connection_password: '{{ ansible_password }}'
encrypt: "{{ 'false' if system == 'win7' else 'true' }}" #win7需要设置false 不使用加密
executable: powershell.exe
arguments: '-'
stdin: |
$url = "{{ fileurl }}/NDP461-KB3102436-x86-x64-AllOS-ENU.exe"
$file = "$env:temp\NDP461-KB3102436-x86-x64-AllOS-ENU.exe"
(New-Object -TypeName System.Net.WebClient).DownloadFile($url, $file)
if (-not (Test-Path -Path $file)) {echo "download $url failed";exit 1}
Start-Process -FilePath $file -ArgumentList "/q /norestart" -Wait
exit
- name: install PowerShell 5.1
#win7远程安装更新限制 https://learn.microsoft.com/zh-cn/troubleshoot/windows-server/installing-updates-features-roles/windows-update-standalone-installer-returns-error
register: install_powershell_result
when: install_net_result.rc == 0
local_action:
module: community.windows.psexec
hostname: '{{ hostvars[inventory_hostname]["ansible_host"] | default(inventory_hostname) }}'
connection_username: '{{ ansible_user }}'
connection_password: '{{ ansible_password }}'
encrypt: "{{ 'false' if system == 'win7' else 'true' }}"
executable: powershell.exe
arguments: '-'
stdin: |
$url = "{{ fileurl }}/Win7AndW2K8R2-KB3191566-x64.msu"
$file = "$env:temp\Win7AndW2K8R2-KB3191566-x64.msu"
$extpath = "$env:temp\Win7AndW2K8R2-KB3191566"
(New-Object -TypeName System.Net.WebClient).DownloadFile($url, $file)
if (-not (Test-Path -Path $file)) {echo "download $url failed";exit 1}
Start-Process -FilePath $file -ArgumentList "/extract:$extpath" -Wait
$cabfiles = Get-ChildItem -Path "$extpath" -Filter "*.cab"
foreach($f in $cabfiles){Start-Process -FilePath "dism.exe" -ArgumentList "/online /add-package /PackagePath:$extpath/$f /IgnoreCheck /quiet /norestart" -Wait}
Restart-Computer -Force
exit

安装openssh(win10系统以上可以在系统设置功能里直接启用)

install_win32-openssh.yml
---
- hosts: win
gather_facts: no
vars:
fileurl: http://192.168.8.192:8888
tasks:
- name: download Win32-OpenSSH
local_action:
module: community.windows.psexec
hostname: '{{ hostvars[inventory_hostname]["ansible_host"] | default(inventory_hostname) }}'
connection_username: '{{ ansible_user }}'
connection_password: '{{ ansible_password }}'
encrypt: "{{ 'false' if system == 'win7' else 'true' }}"
executable: powershell.exe
arguments: '-'
stdin: |
$url = "{{ fileurl }}/OpenSSH-Win64.zip"
$file = "$env:temp\OpenSSH-Win64.zip"
$expath = "C:\Program Files"
(New-Object -TypeName System.Net.WebClient).DownloadFile($url, $file)
if (-not (Test-Path -Path $file)) {echo "download $url failed";exit 1}
(new-object -com shell.application).NameSpace($expath).CopyHere((new-object -com shell.application).NameSpace($file).Items())
exit
- name: install Win32-OpenSSH
register: install_openssh_result
local_action:
module: community.windows.psexec
hostname: '{{ hostvars[inventory_hostname]["ansible_host"] | default(inventory_hostname) }}'
connection_username: '{{ ansible_user }}'
connection_password: '{{ ansible_password }}'
encrypt: "{{ 'false' if system == 'win7' else 'true' }}"
interactive: true
executable: powershell.exe
arguments: '-ExecutionPolicy Bypass -File "C:\Program Files\OpenSSH-Win64\install-sshd.ps1"'
- name: start Win32-OpenSSH
when: install_openssh_result.rc == 0
local_action:
module: community.windows.psexec
hostname: '{{ hostvars[inventory_hostname]["ansible_host"] | default(inventory_hostname) }}'
connection_username: '{{ ansible_user }}'
connection_password: '{{ ansible_password }}'
encrypt: "{{ 'false' if system == 'win7' else 'true' }}"
executable: powershell.exe
arguments: '-'
stdin: |
netsh advfirewall firewall add rule name=sshd dir=in action=allow protocol=TCP localport=22
net start sshd
Set-Service sshd -StartupType Automatic
exit
- debug:
var: install_openssh_result

打开软件(要弹出窗口必须先获取到windows登录用户的session)

open_soft.yml
---
- hosts: win
gather_facts: no
tasks:
- name: open notepad
local_action:
module: community.windows.psexec
hostname: '{{ hostvars[inventory_hostname]["ansible_host"] | default(inventory_hostname) }}'
connection_username: '{{ ansible_user }}'
connection_password: '{{ ansible_password }}'
encrypt: "{{ 'false' if system == 'win7' else 'true' }}"
executable: notepad.exe
#arguments: /c
working_directory: C:\Users\Administrator\Desktop
interactive: true
interactive_session: 2 #在windows上执行qwinsta命令查看session 或者执行命令query session %username%
process_username: system
asynchronous: true

kubernetes安装

· 阅读需 4 分钟
GavinTan
DevOps Engineer

准备系统环境

关闭 swap

swapoff -a
sed -i 's/.*swap/#&/' /etc/fstab

修改内核参数

cat <<EOF > /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_nonlocal_bind = 1
net.ipv4.ip_forward = 1
vm.swappiness = 0
vm.max_map_count = 262144
net.netfilter.nf_conntrack_max = 1000000
EOF

cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
br_netfilter
EOF

echo "* soft nofile 65536" >> /etc/security/limits.conf
echo "* hard nofile 65536" >> /etc/security/limits.conf

modprobe br_netfilter
sysctl -p /etc/sysctl.d/k8s.conf

安装 docker

curl -fsSL https://get.docker.com/ | sh -s -- --mirror Aliyun

创建 docker 配置文件

/etc/docker/daemon.json
{
"registry-mirrors": ["https://fl791z1h.mirror.aliyuncs.com"],
"exec-opts": ["native.cgroupdriver=systemd"],
"log-driver": "json-file",
"log-opts": {
"max-size": "100m"
},
"storage-driver": "overlay2"
}

安装 kubernetes

cat <<EOF | sudo tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-\$basearch
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
exclude=kubelet kubeadm kubectl
EOF
  1. 安装工具

    yum install -y kubelet kubeadm kubectl --disableexcludes=kubernetes
  2. 初始化集群

    信息

    --service-cidr service ip 范围

    --pod-network-cidr pod ip 范围

    出现 tc not found in system path 错误: yum install -y iproute-tc

    # 高可用集群使用参数--control-plane-endpoint=mycluster:443
    kubeadm init --kubernetes-version=1.18.0 --apiserver-advertise-address=172.16.7.14 --image-repository registry.aliyuncs.com/google_containers --service-cidr=10.10.0.0/16 --pod-network-cidr=10.244.0.0/16
  3. 安装网络插件

    kubectl create -f https://raw.githubusercontent.com/projectcalico/calico/v3.26.4/manifests/tigera-operator.yaml
    kubectl create -f https://raw.githubusercontent.com/projectcalico/calico/v3.26.4/manifests/custom-resources.yaml
    提示

    calico 节点出现 mster calico-node notready 状态以及出现 Connect Socket: Connection reset by peer bird: BGP: Unexpected connect from unknown address

    # 为Calico节点配置IP自动检测,以确保使用正确的IP地址进行路由interface=修改为对应的实际物理网卡
    kubectl set env daemonset/calico-node -n kube-system IP_AUTODETECTION_METHOD=interface=eth.*

集群设置

启用 ipvs 模式

# 修改ConfigMap的kube-system/kube-proxy中的config.conf,mode: "ipvs"

kubectl edit cm kube-proxy -n kube-system

# 修改后重启kube-proxy
kubectl rollout restart daemonset kube-proxy -n kube-system

启用 vip

提示

多 master 须在每个 master 节点上面执行

# 定义vip地址必须同在主机网络非使用的ip
export VIP=172.16.7.18
# 定义网卡接口主机当前网卡
export INTERFACE=eth0

ctr image pull ghcr.io/kube-vip/kube-vip:v0.4.0
ctr run --rm --net-host ghcr.io/kube-vip/kube-vip:v0.4.0 vip /kube-vip manifest pod \
--interface $INTERFACE \
--vip $VIP \
--controlplane \
--services \
--arp \
--leaderElection | tee /etc/kubernetes/manifests/kube-vip.yaml

安装 storageclass

需要修改 deployment.yaml 文件里的 nfs 服务地址与路径

git clone https://github.com/kubernetes-sigs/nfs-subdir-external-provisioner
cd nfs-subdir-external-provisioner && kubectl apply -f deploy/deployment.yaml deploy/rbac.yaml deploy/class.yaml

设置 nfs 默认 sc

kubectl patch storageclass managed-nfs-storage -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'

安装 ingress

kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.1.3/deploy/static/provider/baremetal/deploy.yaml

设置默认 ingress 类

kubectl patch ingressclass nginx -p '{"metadata": {"annotations":{"ingressclass.kubernetes.io/is-default-class":"true"}}}'

ingress-nginx path 被带过去 ingress 配置添加下面配置

annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
信息

添加一个 ingress(通过 ingressClassName 可指定 ingressclass,不指定使用默认 ingressclass)

kind: Ingress
apiVersion: networking.k8s.io/v1
metadata:
namespace: ops
spec:
ingressClassName: nginx
rules:
- host: zabbix.172.16.7.14.nip.io
http:
paths:
- path: /
pathType: ImplementationSpecific
backend:
service:
name: zabbix-web
port:
number: 8080

安装 kubernetes-dashboard

kubectl apply -f https://raw.githubusercontent.com/kubernetes/dashboard/v2.0.0/aio/deploy/recommended.yaml

授权

kubectl create clusterrolebinding serviceaccount-cluster-admin --clusterrole=cluster-admin --user=system:serviceaccount:kubernetes-dashboard:kubernetes-dashboard

#kubectl create clusterrolebinding serviceaccount-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccount

获取 token

kubectl describe secrets -n kubernetes-dashboard $(kubectl -n kubernetes-dashboard get secret|grep kubernetes-dashboard-token|awk '{print $1}')| grep token | awk 'NR==3{print $2}'

集群外部访问

# 修改service配置,找到type,将ClusterIP改成NodePort 设置nodePort端口
kubectl edit svc kubernetes-dashboard -n kubernetes-dashboard

安装 kubeshpere

kubectl apply -f https://github.com/kubesphere/ks-installer/releases/download/v3.1.1/kubesphere-installer.yaml


kubectl apply -f https://github.com/kubesphere/ks-installer/releases/download/v3.1.1/cluster-configuration.yaml

检查安装日志

kubectl logs -n kubesphere-system $(kubectl get pod -n kubesphere-system -l app=ks-install -o jsonpath='{.items[0].metadata.name}') -f

常用操作

触发滚动更新

kubectl rollout restart deploy myapp-deploy -n ops


#旧版本不支持rollout的修改deployment配置文件触发滚动更新
revision=`kubectl -n ops get deploy opsinterface-v1 -ojson|jq -r '.metadata.annotations."deployment.kubernetes.io/revision"|tonumber+1'`
kubectl patch deployment test01-app1 -p '{"spec":{"template": {"metadata": {"annotations": {"deployment.kubernetes.io/revision": $revision}}}}}'

kubectl patch deployment test01-app1 -p '{"spec":{"template": {"metadata": {"annotations": {"kubectl.kubernetes.io/restartedAt": "'`date -Iseconds`'"}}}}}'

新节点加入集群

# 自动生成join命令
kubeadm token create --print-join-command

导出当前集群配置

kubeadm config view > k8s.yaml

更新证书

kubeadm alpha certs renew all --config=k8s.yaml

修改节点 ROLES

kubectl label --overwrite nodes nodename kubernetes.io/role=node1

kubectl 自动补全

echo "source <(kubectl completion bash)" >> ~/.bashrc

疑难解答

卡住 Terminating 状态的资源无法删除

删除资源 yml 配置里的 finalizers 内容即可

kubectl patch ns/myns -p '{"metadata":{"finalizers":[]}}' --type=merge

etcd 无法启动(节点挂掉)

--force-new-cluster 添加该参数覆盖旧集群信息,正常启动后可去掉

vim /etc/kubernetes/manifests/etcd.yaml

# 查看etcd集群信息
etcdctl --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/server.crt --key=/etc/kubernetes/pki/etcd/server.key endpoint status --cluster -w table

删除所有 Evicted Pod

kubectl get pods --all-namespaces -o json | jq '.items[] | select(.status.reason!=null) | select(.status.reason | contains("Evicted")) | "kubectl delete pods \(.metadata.name) -n \(.metadata.namespace)"' | xargs -n 1 bash -c

修复/etc/kubernetes 所有文件

  1. 生成集群配置文件

    kubeadm init phase certs all --config k8s.yml
    kubeadm init phase kubeconfig all --config k8s.yml
    kubeadm init phase control-plane all --config k8s.yml
    kubeadm init phase etcd local --config k8s.yml
  2. 更新 cluster-info 配置

    kubeadm init phase bootstrap-token
  3. 重启控制平面组件

    docker ps |grep -E 'k8s_kube-apiserver|k8s_kube-controller-manager|k8s_kube-scheduler|k8s_etcd_etcd' | awk  '{print $1}'|xargs docker restart
  4. 修复 kubelet 配置

    systemctl stop kubelet
    rm -rf /var/lib/kubelet/pki/ /etc/kubernetes/kubelet.conf
    kubeadm init phase kubeconfig kubelet --config k8s.yml
    kubeadm init phase kubelet-start --config k8s.yml

WinServer2022安装RDWeb HTML5客户端

· 阅读需 2 分钟
GavinTan
DevOps Engineer

安装Remote Desktop Services

  1. 安装Active Directory域服务

    image-20231012171310650

  2. 安装:添加角色和功能向导-->远程桌面服务安装-->快速启动-->基于会话的桌面部署

    image-20231017161014879

  3. 配置:服务器管理器-->远程桌面服务

    点击概述里绿色的RD 网关 + RD 授权进行配置

    RD网关自签证书名称配置自己域名和自定义其他域名都可

    image-20231020163129119

    点击概述里的部署概述任务下拉选项-->编辑部署属性-->证书(使用自己证书或是直接点击创建新证书)

    管理证书里面选择现有证书如果出现 "无法在一个或多个服务器上配置该证书" 错误,需要双击pfx证书导入存储位置要选择本地计算机。

    导出pfx证书: openssl pkcs12 -export -out twss.pfx -inkey /etc/pki/ssl/twss.tk.key -in /etc/pki/ssl/twss.tk.crt

    image-20231020163810060

  4. 访问:https://server_FQDN/RDWeb/webclient/index.html

remoteApp 别名中文会出现"String has UTF-16 code units that do not fit in 8 bits"错误导致黑屏(默认选用快速启动创建的示例的remoteApp会是中文别名)

安装RD WEB HTML5客户端

查看官方安装文档

  1. 更新 PowerShellGet 模块

    Install-Module -Name PowerShellGet -Force
  2. 使用此 cmdlet 从 PowerShell 库安装远程桌面 Web 客户端管理 PowerShell 模块

    上面更新模块后需要先重启 PowerShell,然后更新才能生效,否则模块可能无法正常工作。

    Install-Module -Name RDWebClientManagement
  3. 下载远程桌面 Web 客户端的最新版本:

    Install-RDWebClientPackage
  4. 导入ssl证书[.cer 或 .crt 文件的路径]

    可以在certmgr.msc个人证书里导出

    Import-RDWebClientBrokerCert [.cer file path]
  5. 发布远程桌面 Web 客户端

    可能会看到一个警告,指出不支持每设备 CAL,即使是针对每用户 CAL 配置部署也是如此。 如果部署使用每用户 CAL,则可以忽略此警告。 我们显示它是为了确保你了解配置限制。

    Publish-RDWebClientPackage -Type Production -Latest

    客户端访问地址https://server_FQDN/RDWeb/webclient/index.html

卸载:

Uninstall-RDWebClient

Uninstall-Module -Name RDWebClientManagement

hadoop安装

· 阅读需 4 分钟
GavinTan
DevOps Engineer

zookeeper 集群

zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/data/zookeeper
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1

## Metrics Providers
#
# https://prometheus.io Metrics Exporter
#metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider
#metricsProvider.httpPort=7000
#metricsProvider.exportJvmInfo=true
#当前节点配置0.0.0.0
server.1=0.0.0.0:2888:3888
server.2=172.16.7.15:2888:3888
server.3=172.16.7.16:2888:3888

创建myid与server.后面的标识对应

echo 1 > /data/zookeeper/myid

hadoop ha集群

配置文件

peicore-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
<description>namenode地址,配置ha后应配置成ha nameservice名称</description>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data/hadoop</value>
<description>hadoop文件存放路径的根目录,nn dn默认会存储在该位置</description>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
<description>在序列中使用的缓冲区大小,以byte为单位,默认值是4KB</description>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>bigdata</value>
<description>在网页界面访问hdfs使用的用户名,配置与启动hadoop同样用户才有权限访问hdfs</description>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>172.16.7.14:2181,172.16.7.15:2181,172.16.7.16:2181</value>
<description>配置zookeeper集群地址</description>
</property>
</configuration>
hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
<description>副本数,HDFS存储时的备份数量</description>
</property>
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
<description>配置ha nameservice名称</description>
</property>
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
<description>设置NameNode ID列表进行</description>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>172.16.7.14:8020</value>
<description>设置nn1的NameNode进程的地址和IPC端口</description>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>172.16.7.15:8020</value>
<description>设置nn2的NameNode进程的地址和IPC端口</description>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>172.16.7.14:9870</value>
<description>设置nn1的NameNode的web ui地址</description>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>172.16.7.15:9870</value>
<description>设置nn2的NameNode的web ui地址</description>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://172.16.7.14:8485;172.16.7.15:8485/mycluster</value>
<description>指定NameNode的元数据在JournalNode上的存放位置</description>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/hadoop/journaldata</value>
<description>指定JournalNode在本地磁盘存放数据的位置</description>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
<description>开启NameNode自动故障转移</description>
</property>
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
<description>配置故障转移代理类</description>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
<description>隔离方法故障转移期间用来隔离Active NameNode,sshfence-SSH到Active NameNode使用fuser终止进程防止存在多个Active NameNode</description>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/bigdata/.ssh/id_rsa</value>
<description>使用sshfence隔离机制时必须ssh免密登陆,配置SSH私钥文件</description>
</property>
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
<description>sshfence隔离方法超时时间,以毫秒为单位</description>
</property>
</configuration>
mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<description>用于执行MapReduce作业的运行时框架默认local</description>
</property>
<property>
<name>mapreduce.admin.user.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
<description>可以设置AM【AppMaster】端的环境变量,如果上面缺少配置,可能会造成mapreduce失败</description>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
<description>可以设置AM【AppMaster】端的环境变量,如果上面缺少配置,可能会造成mapreduce失败</description>
</property>

</configuration>
yarn-site.xml
<configuration>

<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
<description>自定义服务配置MapReduce运行须配置成mapreduce_shuffle</description>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
<description>ResourceManager的主机名</description>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>172.16.7.14:8088</value>
<description>yarn web ui地址</description>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
<description>启用日志聚合功能,日志聚合开启后保存到HDFS上</description>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>86400</value>
<description>聚合后的日志在HDFS上保存时间</description>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://172.16.7.14:19888/jobhistory/logs</value>
<description>日志聚合服务器的URL</description>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/tmp/logs</value>
<description>日志在HDFS上存储路径</description>
</property>
</configuration>

配置工人节点DataNode和NodeManager

vim /etc/hadoop/workers
172.16.7.15
172.16.7.16

启动

#启动zookeeper集群,在所有zookeeper节点上执行
./bin/zkServer.sh start

#启动jurnalnode进程,在配置的所有namenode节点执行
./bin/hdfs --daemon start journalnode

#格式化namenode,在配置的namenode其中任意一台上执行,仅需要第一次启动集群执行
./bin/hdfs namenode -format
./bin/hdfs --daemon start namenode

#同步namenode元数据,在未执行格式化的其他namenode节点上执行
./bin/hdfs namenode -bootstrapStandby

#启动MR历史记录
./bin/mapred --daemon start historyserver

#在ZooKeeper中初始化HA状态,在配置的namenode其中任意一台上执行
./bin/hdfs zkfc -formatZK

#启动zkfc
./bin/hdfs --daemon start zkfc

#启动所有服务
./sbin/start-all.sh

常用操作

#查看集群状态
./bin/hadoop dfsadmin -report

#停掉namenode
./bin/hdfs --daemon stop namenode

#执行wordcount例子
./bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.1.jar wordcount /input /output

ambari部署(管理监控hadoop)

编译

yum install maven rpm-build

wget https://www-eu.apache.org/dist/ambari/ambari-2.7.5/apache-ambari-2.7.5-src.tar.gz
tar xfvz apache-ambari-2.7.5-src.tar.gz
cd apache-ambari-2.7.5-src
mvn versions:set -DnewVersion=2.7.5.0.0

pushd ambari-metrics
mvn versions:set -DnewVersion=2.7.5.0.0
popd

#RHEL (CentOS 7) & SUSE (SLES 12 SP2 & SP3)
mvn -B clean install rpm:rpm -DnewVersion=2.7.5.0.0 -DbuildNumber=5895e4ed6b30a2da8a90fee2403b6cab91d19972 -DskipTests -Dpython.ver="python >= 2.6" -Drat.skip=true

#Ubuntu/Debian
mvn -B clean install jdeb:jdeb -DnewVersion=2.7.5.0.0 -DbuildNumber=5895e4ed6b30a2da8a90fee2403b6cab91d19972 -DskipTests -Dpython.ver="python >= 2.6" -Drat.skip=true

安装

#RHEL (CentOS 7) & SUSE (SLES 12 SP2 & SP3)
yum install ambari-server/target/rpm/ambari-server/RPMS/noarch/ambari-server*.rpm

yum install ambari-agent/target/rpm/ambari-agent/RPMS/x86_64/ambari-agent*.rpm

#Ubuntu/Debian
apt-get install ambari-server/target/rpm/ambari-server/RPMS/noarch/ambari-server*.deb

apt-get install ambari-agent/target/rpm/ambari-agent/RPMS/x86_64/ambari-agent*.deb

启动运行

ambari-server setup

ambari-server start
ambari-agent start

访问

http://<ambari-server-host>:8080   admin/admin
提示

bower error Unexpected token [ERROR] Failed to execute goal org.codehaus.mojo:exec-maven-plugin:1.2.1:exec (Bower install) on project ambari-admin: Command execution failed. Process exited with an error: 1 (Exit value: 1) -> [Help 1]

vi ambari-admin/pom.xml
<argument>${basedir}/src/main/resources/ui/admin-web/node_modules/bower/bin/bower</argument>
改为
<argument>bower</argument>

snort3安装

· 阅读需 2 分钟
GavinTan
DevOps Engineer

准备环境

apt install -y build-essential autotools-dev libdumbnet-dev libluajit-5.1-dev libpcap-dev zlib1g-dev pkg-config libhwloc-dev cmake liblzma-dev openssl libssl-dev cpputest libsqlite3-dev libtool uuid-dev git autoconf bison flex libcmocka-dev libnetfilter-queue-dev libunwind-dev libmnl-dev ethtool libjemalloc-dev libpcre3-dev

编译安装

安装libdaq

git clone https://github.com/snort3/libdaq.git
cd libdaq
./bootstrap
./configure
make -j 4
make install
ldconfig

安装snort3

git clone https://github.com/snort3/snort3.git
cd snort3
./configure_cmake.sh --prefix=/usr/local/snort3 --enable-tcmalloc
cd build
make -j 4
make install

centos install: Snort_3_GA_on_OracleLinux_8.pdf

配置

设置alert_json启用日志文件

cat << EOF > /usr/local/snort3/etc/snort/snort.lua
alert_json =
{
file = true,
limit = 200
}
EOF

添加警报规则

alert icmp any any -> $HOME_NET any (msg:"[警告]检测到 ICMP connection 请及时处理"; sid:1000001; rev:1;)

运行

下载community-rules

wget https://www.snort.org/downloads/community/snort3-community-rules.tar.gz

运行snort(IDS)

snort -c /usr/local/snort3/etc/snort/snort.lua -R /data/rules/snort3-community.rules -i ens192 -s 65535 -k none -A alert_fast -n 100000


# 保存日志
snort -c /usr/local/snort3/etc/snort/snort.lua -R /data/rules/snort3-community.rules -i ens192 -s 65535 -k none -A alert_fast -l /data/logs

常用操作

查看alert模块

snort --help-modules | grep alert

运行docker配置

Dockerfile
from debian:stable-20231120

run apt update
run apt install -y build-essential autotools-dev libdumbnet-dev libluajit-5.1-dev libpcap-dev zlib1g-dev pkg-config libhwloc-dev cmake liblzma-dev openssl libssl-dev cpputest libsqlite3-dev libtool uuid-dev git autoconf bison flex libcmocka-dev libnetfilter-queue-dev libunwind-dev libmnl-dev ethtool libjemalloc-dev libpcre3-dev


run cd && git clone https://github.com/snort3/libdaq.git && cd libdaq && ./bootstrap && ./configure && make -j 4 && make install

run cd && git clone https://github.com/snort3/snort3.git && cd snort3 && ldconfig && ./configure_cmake.sh --prefix=/usr/local/snort3 && cd build && make -j 4 && make install

run echo 'PATH=$PATH:/usr/local/snort3/bin' >> ~/.bashrc

run <<EOF cat >> /usr/local/snort3/etc/snort/snort.lua
alert_fast =
{
file = true,
limit = 200
}
EOF
docker-compose.yml
services:
snort:
build: .
command: /usr/local/snort3/bin/snort -c /usr/local/snort3/etc/snort/snort.lua -R /data/rules/snort3-community.rules -i ens192 -s 65535 -k none -A alert_fast -l /data/logs
network_mode: host
volumes:
- ./data:/data
- /etc/localtime:/etc/localtime:ro

ProxySQL安装

· 阅读需 6 分钟
GavinTan
DevOps Engineer

安装

cat <<EOF | tee /etc/yum.repos.d/proxysql.repo
[proxysql_repo]
name= ProxySQL YUM repository
baseurl=https://repo.proxysql.com/ProxySQL/proxysql-2.1.x/centos/\$releasever
gpgcheck=1
gpgkey=https://repo.proxysql.com/ProxySQL/repo_pub_key
EOF

yum install proxysql -y

添加mysql集群节点

提示

同一个节点可以存在多个hostgroup里

mysql -u admin -padmin -h 127.0.0.1 -P 6032

INSERT INTO mysql_servers(hostgroup_id, hostname, port, use_ssl) VALUES (0,'192.168.70.71',3306,1);
INSERT INTO mysql_servers(hostgroup_id, hostname, port, use_ssl) VALUES (0,'192.168.70.72',3306,1);
INSERT INTO mysql_servers(hostgroup_id, hostname, port, use_ssl) VALUES (0,'192.168.70.73',3306,1);


# 保存配置
LOAD MYSQL SERVERS TO RUNTIME;
SAVE MYSQL SERVERS TO DISK;

查看

SELECT * FROM mysql_servers;

添加客户端登录用户

注意

用户必须在mysql节点中存在账号密码一样,目前proxysql只支持mysql_native_password密码插件,在mysql节点添加用户时候必须指定插件为mysql_native_password。

# mysql节点中执行
CREATE USER 'root'@'%' IDENTIFIED WITH mysql_native_password by '123456';
GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' WITH GRANT OPTION;

通过定义default_hostgroup我们指定用户应该默认连接到指定相同hostgroup_id后端服务器

# proxysql中执行
INSERT INTO mysql_users (username,password,default_hostgroup) VALUES ('root',MYSQL_NATIVE_PASSWORD('123456'),0);
LOAD MYSQL USERS TO RUNTIME;
SAVE MYSQL USERS TO DISK;

查看

SELECT * FROM mysql_users;

连接

admin管理接口,默认端口为6032。该端口用于查看、配置ProxySQL。

接收SQL语句的接口,默认端口为6033,该端口用于mysql客户端连接。

其他配置

  1. 配置监控用户

    在mysql节点中执行创建监控用户

    CREATE USER 'proxysql'@'%' IDENTIFIED WITH mysql_native_password by '123456';
    GRANT USAGE ON *.* TO 'proxysql'@'%';

    在proxysql中执行设置监控用户账号密码

    UPDATE global_variables SET variable_value='proxysql' WHERE variable_name='mysql-monitor_username';
    UPDATE global_variables SET variable_value='123456' WHERE variable_name='mysql-monitor_password';

    LOAD MYSQL VARIABLES TO RUNTIME;
    SAVE MYSQL VARIABLES TO DISK;

    查看监控信息

    SELECT * FROM monitor.mysql_server_connect_log ORDER BY time_start_us DESC LIMIT 6;
    SELECT * FROM monitor.mysql_server_ping_log ORDER BY time_start_us DESC LIMIT 6;
  2. 配置read_only监控和读/写组

    mysql节点有read_only=0的hostgroup将自动设置为0,read_only=1设置成1

    INSERT INTO mysql_replication_hostgroups (writer_hostgroup,reader_hostgroup,comment) VALUES (0,1,'cluster1');

    LOAD MYSQL SERVERS TO RUNTIME;
    SAVE MYSQL SERVERS TO DISK;

    设置执行只读检查的频率,以毫秒为单位。

    UPDATE global_variables SET variable_value=5000 WHERE variable_name='mysql-monitor_read_only_interval';

    设置只读检查超时时间(以毫秒为单位)

    UPDATE global_variables SET variable_value=5000 WHERE variable_name='mysql-monitor_read_only_timeout';

    LOAD MYSQL VARIABLES TO RUNTIME;
    SAVE MYSQL VARIABLES TO DISK;

    # 查看
    SELECT * FROM monitor.mysql_server_read_only_log ORDER BY time_start_us DESC LIMIT 3;
  3. 启用前端的 SSL/TLS(zabbix连接之类)

    SET mysql-have_ssl="true";
    LOAD MYSQL VARIABLES TO RUNTIME;
    SAVE MYSQL VARIABLES TO DISK;

    # 查看
    SELECT * FROM global_variables WHERE variable_name LIKE 'mysql%ssl%';
  4. 设置返回给客户端的 MySQL 版本号(zabbix-server限定客户端版本之类)

    set mysql-server_version="8.0.27";
    LOAD MYSQL VARIABLES TO RUNTIME;
    SAVE MYSQL VARIABLES TO DISK;

    # 查看
    SELECT * FROM global_variables WHERE variable_name LIKE '%version%';
  5. 配置查询规则

    • 查询规则按rule_id从小到大顺序处理

    • 仅处理已active=1处理的规则

    • 第一个规则示例使用插入符号 ( ^) 和美元 ( $) :这些是特殊的正则表达式字符,用于标记模式的开始和结束,即在这种情况下match_digest或match_pattern应该完全匹配查询

    • 不使用插入符号或美元:匹配可以在查询中的任何位置

    • 问号被转义,因为它在正则表达式中具有特殊含义

    • apply=1表示如果当前规则匹配则不会继续匹配后的规则

    • match_digest:将正则表达式与去除 SQL 查询数据的查询摘要进行匹配(例如 SELECT c FROM sbtest1 WHERE id=?,如stats_mysql_query_digest.query_digest)

    • match_pattern:将正则表达式与查询的实际文本匹配(例如,SELECT c FROM sbtest1 WHERE id=2

    • 当入口值flagIN设置为0时,表示开始进入链式规则。

    • 如未显式指定规则的flagIN值,则默认都为0。当语句匹配完当前规则后,将记下当前规则的flagOUT值,如果flagOUT值非空(NOT NULL),则为该语句打上flagOUT标记。如果该规则的apply字段值不是1,则继续向下匹配。如果语句的flagOUT标记和下一条规则的flagIN值不同,则跳过该规则,继续向下匹配。直到匹配到flagOUT=flagIN的规则,则匹配该规则。该规则是链式规则中的另一条规则。直到某规则的apply字段设置为1,或者已经匹配完所有规则,则最后一次被评估的规则将直接生效,不再继续向下匹配。

    提示

    如果想对match_digest取反,即不被正则匹配的SQL语句才命中规则,则设置mysql_query_rules表中的字段negate_match_pattern=1。同样适用于下面的match_pattern匹配方式。

    摘要总是比查询本身小,对较小的字符串运行正则表达式会更快,建议(出于性能考虑)使用match_digest. 要重写查询或匹配查询文本本身,请使用match_pattern.

    查看语句匹配

    INSERT INTO mysql_query_rules (rule_id,active,username,match_digest,destination_hostgroup,apply) VALUES (10,1,'stnduser','^SELECT * FROM sbtest1 WHERE id=\?$',10,1);

    INSERT INTO mysql_query_rules (rule_id,active,username,match_digest,destination_hostgroup,apply) VALUES (10,1,'stnduser','^SELECT',10,1);

    数据库名称匹配(不利用 use databases 并且不命中其他规则,默认转发到用户 default_hostgroup)

    instert into mysql_query_rules (rule_id, active, schemaname, destination_hostgroup,apply) values(1,1,'aa', 10, 1);

    客户端IP匹配

    insert into mysql_query_rules (rule_id, active, client_addr, destination_hostgroup) values(2,1,'192.168.8.192', 10);

    禁止查询,可以配合客户端ip策略设置白名单

    insert into mysql_query_rules (rule_id, active, match_digest, error_msg) values(3,1,'.','error 9999');

    查询重写

    INSERT INTO mysql_query_rules (rule_id,active,username,match_pattern,replace_pattern,apply) VALUES (30,1,'root','DISTINCT(.*)ORDER BY c','DISTINCT\1',1);

    查询缓存 cache_ttl(毫秒)

    UPDATE mysql_query_rules set cache_ttl=5000 WHERE rule_id=10;

    保存规则配置

    LOAD MYSQL QUERY RULES TO RUNTIME;
    SAVE MYSQL QUERY RULES TO DISK;

    # 查看
    SELECT match_digest,destination_hostgroup FROM mysql_query_rules;

    SELECT rule_id, match_digest, match_pattern, replace_pattern, cache_ttl, apply FROM mysql_query_rules ORDER BY rule_id;
    # 查看hg(主机组)=-1为缓存查询
    SELECT hostgroup hg, sum_time, count_star, digest_text FROM stats_mysql_query_digest ORDER BY sum_time DESC limit 10;

全局变量

变量默认值说明
admin-admin_credentialsadmin:admin管理端口用户名和密码
admin-mysql_ifaces0.0.0.0:6032管理端口
admin-stats_credentialsstats:stats数据端口用户名和密码
mysql-commands_statstrue是否开启 SQL 统计,开启后会分析每条 SQL 语句
mysql-connection_max_age_ms0到 Backend 的连接空闲多久后会自动关闭
mysql-default_query_timeout86400000到 Backend 的查询超时时间(毫秒),超过后会主动停止查询,并从 Backend Kill 掉该连接
mysql-free_connections_pct10允许的 Backend 空闲连接数,是一个占 mysql-max_connections 数量的百分比
mysql-interfaces0.0.0.0:6033数据端口配置
mysql-max_connections2048ProxySQL 可接收的最大连接数。默认 10000。
mysql-server_version5.5.30ProxySQL 返回给客户端的 MySQL 版本号,有可能影响客户端行为
mysql-session_idle_show_processlisttrue管理端口进行 show processlist 时,是否显示空闲连接,开启后会影响性能
mysql-wait_timeout28800000客户端连接空闲超时时间(毫秒)

修改

set admin-admin_credentials='admin:admin;myuser:myuser';

# 使修改立即生效
load admin variables to runtime;
# 使修改永久保存到磁盘
save admin variables to disk;

查看

SELECT * FROM global_variables;
SELECT @@admin-stats_credentials;
SHOW VARIABLES LIKE "mysql-max_connections";

Percona-XtraDB-Cluster安装

· 阅读需 2 分钟
GavinTan
DevOps Engineer

pxc集群特点

特点说明
同步复制数据同时写入所有节点,或者即使在单个节点上也发生故障时根本不写入
多源复制任何节点都可以触发数据更新。
真正的并行复制副本上的多个线程在行级别执行复制
自动节点调配只需添加一个节点,它就会自动同步。
数据一致性不再有不同步的节点。
PXC 严格模式避免使用技术预览功能和不受支持的功能
ProxySQL 的配置脚本Percona XtraDB Cluster包含proxysql-admin工具,该工具能够自动配置使用ProxySQL的Percona XtraDB Cluster节点。
SSL加密的自动配置Percona XtraDB Cluster包含pxc-encrypt-cluster-traffic变量,该变量启用SSL加密的自动配置
优化性能Percona XtraDB Cluster的性能被优化,以适应不断增长的生产负载

准备环境

yum install -y openssl socat  \
procps-ng chkconfig procps-ng coreutils shadow-utils \
grep libaio libev libcurl perl-DBD-MySQL perl-Digest-MD5 \
libgcc libstdc++ libgcrypt libgpg-error zlib glibc openssl-libs

useradd -M -s /sbin/nologin mysql
mkdir -p /data/mysql /var/run/mysqld /var/log/mysqld
chown -R mysql. /data/mysql /var/run/mysqld /var/log/mysqld

安装

wget https://downloads.percona.com/downloads/Percona-XtraDB-Cluster-80/Percona-XtraDB-Cluster-8.0.27/binary/tarball/Percona-XtraDB-Cluster_8.0.27-18.1_Linux.x86_64.glibc2.17-minimal.tar.gz
tar zxf Percona-XtraDB-Cluster_8.0.27-18.1_Linux.x86_64.glibc2.17-minimal.tar.gz
mkdir -p /usr/local/percona
mv Percona-XtraDB-Cluster_8.0.27-18.1_Linux.x86_64.glibc2.17-minimal /usr/local/percona/mysql

cp /usr/local/percona/mysql/support-files/mysql.server /etc/init.d/mysqld
sed -i 's/^basedir=.*/basedir=\/usr\/local\/percona\/mysql/' /etc/init.d/mysqld
sed -i 's/^datadir=.*/datadir=\/data\/mysql/' /etc/init.d/mysqld

cat <<EOF > /etc/profile.d/mysql.sh
export PATH=$PATH:/usr/local/percona/mysql/bin
EOF
source /etc/proflie

生成配置文件

不同节点需要修改server-idwsrep_node_namewsrep_node_address

cat <<EOF > /etc/my.cnf.d/pxc.cnf
[client]
socket=/var/run/mysqld/mysql.sock

[mysqld]
basedir=/usr/local/percona/mysql
datadir=/data/mysql

socket=/var/run/mysqld/mysql.sock
pid-file=/var/run/mysqld/mysqld.pid
log-error=/var/log/mysqld/mysqld.log

server-id=14
user=mysql

log-bin
binlog_format=ROW
binlog_expire_logs_seconds=604800

innodb_autoinc_lock_mode=2
default_storage_engine=InnoDB
log_timestamps=SYSTEM

######## wsrep ###############
wsrep_cluster_name=pxc-cluster-tt
wsrep_cluster_address=gcomm://172.16.7.14,172.16.7.15,172.16.7.16
wsrep_node_name=pxc-node-14
wsrep_node_address=172.16.7.14
wsrep_applier_threads=8
wsrep_log_conflicts
pxc_strict_mode=ENFORCING
wsrep_sst_method=xtrabackup-v2
wsrep_provider=/usr/local/percona/mysql/lib/libgalera_smm.so
wsrep_provider_options="socket.ssl_key=server-key.pem;socket.ssl_cert=server-cert.pem;socket.ssl_ca=ca.pem"


[sst]
encrypt=4
ssl-key=server-key.pem
ssl-ca=ca.pem
ssl-cert=server-cert.pem
EOF

初始化数据库

mysqld --initialize

启动数据库

提示

从 8.0.31 版本开始,SST 复制在 root 用户下停止工作。myqld不能在root下启动!

su mysql -s /bin/bash -c '/etc/init.d/mysqld start'
#第一个启动节点必须使用bootstrap-pxc方式,须注释functions不使用systemd不然bootstrap-pxc参数无效
sed -i 's/^. \/etc\/rc.d\/init.d\/functions/#&/' /etc/init.d/mysqld
/etc/init.d/mysqld bootstrap-pxc

#其他节点启动
#同步证书,其他节点必须使用第一个启动节点的证书
rsync -aP 172.16.7.14:/data/mysql/*.pem /data/mysql
/etc/init.d/mysqld start

Wireguard安装

· 阅读需 6 分钟
GavinTan
DevOps Engineer

WireGuard是一种极其简单但快速且现代的 VPN,采用最先进的加密技术。它的目标是比 IPsec更快、更简单、更精简、更有用,同时避免令人头疼的问题。它的性能远高于 OpenVPN。WireGuard 被设计为通用 VPN,可在嵌入式接口和超级计算机上运行,适合许多不同的情况。它最初针对 Linux 内核发布,现在已跨平台(Windows、macOS、BSD、iOS、Android)且可广泛部署。

服务器安装

前往官方安装文档

Linux内核必须>5.6

yum install wireguard-tools

Quick Start

前往配置文档

  1. 密钥生成

    umask 077
    wg genkey | tee privatekey | wg pubkey > publickey
  2. 生成配置文件

    当客户端配置了AllowedIPs = 0.0.0.0/0代理所有流量,服务器端必须添加PostUp的iptables来转发流量客户端才能正常使用,是支持多个Peer的也就是能同时配置多个远程端点。

    AllowedIPs通俗的来说就是本地需要经过wireguard的ip网段都要配置上

    umask 077
    cat <<EOF > /etc/wireguard/wg0.conf
    [Interface]
    PrivateKey = `cat privatekey`
    Address = 172.16.100.1/24 #地址须唯一
    ListenPort = 51820 #udp端口
    PostUp = sysctl -w net.ipv4.ip_forward=1
    PostUp = iptables -A FORWARD -i %i -j ACCEPT
    PostUp = iptables -A FORWARD -o %i -j ACCEPT
    PostUp = iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE
    PostDown = sysctl -w net.ipv4.ip_forward=0
    PostDown = iptables -D FORWARD -i %i -j ACCEPT
    PostDown = iptables -D FORWARD -o %i -j ACCEPT
    PostDown = iptables -t nat -D POSTROUTING -o eth0 -j MASQUERADE

    [Peer]
    PublicKey = <客户端的publickey>
    AllowedIPs = 172.16.100.2/32
    EOF
  3. 启动wireguard

    wg-quick up wg0

    #设置开机自启
    systemctl enable wg-quick@wg0
  4. 停止wireguard

    wg-quick down wg0

    #删除开机自启
    systemctl disable wg-quick@wg0

客户端安装

客户端需要全局流量走wireguard需要添加AllowedIPs = 0.0.0.0/0配置,不行全局流量都走wireguard只需要在AllowedIPs 配置需要经过vpn的网段或ip即可

如果是在windows上客户端打通隧道需要在连接局域网的网卡上设置Internet连接共享允许本地wireguard Tunnel创建的网卡

[Interface]
PrivateKey = <客户端的privatekey>
Address = 172.16.100.2/24 #地址须唯一且同服务器为同一网段
DNS = 8.8.8.8,1.1.1.1

[Peer]
PublicKey = <服务器的publickey>
AllowedIPs = 0.0.0.0/0,::/0
Endpoint = <server>:51820
PersistentKeepalive = 25 #当服务器位于NAT或防火墙后面时需要配置keepalive

疑难解答

使用动态域名之类的ip发生变化不会自动重连

git clone https://git.zx2c4.com/wireguard-tools /usr/share/wireguard-tools


cat <<EOF > /etc/systemd/system/wireguard_reresolve-dns.timer
[Unit]
Description=Periodically reresolve DNS of all WireGuard endpoints

[Timer]
OnCalendar=*:*:0/30

[Install]
WantedBy=timers.target
EOF



cat <<EOF > /etc/systemd/system/wireguard_reresolve-dns.service
[Unit]
Description=Reresolve DNS of all WireGuard endpoints
Wants=network-online.target
After=network-online.target

[Service]
Type=oneshot
ExecStart=/bin/sh -c 'for i in /etc/wireguard/*.conf; do /usr/share/wireguard-tools/contrib/reresolve-dns/reresolve-dns.sh "$i"; done'
EOF



systemctl enable wireguard_reresolve-dns.timer --now

遇到运营商UDP限速(QOS)

WireGuard 在国内网络环境下会遇到一个致命的问题:UDP 封锁/限速。虽然通过 WireGuard 可以在隧道内传输任何基于 IP 的协议(TCP、UDP、ICMP、SCTP、IPIP、GRE 等),但 WireGuard 隧道本身是通过 UDP 协议进行通信的,而国内运营商几乎全部采取一刀切的手段:对 UDP 进行限速甚至封锁。

解决方法:使用Phantun将UDP伪装成TCP连接。

服务端:

假设服务端的公网 IP 地址是 121.36.134.95,WireGuard 监听端口是 51822。首先修改配置文件 /etc/wireguard/wg0.conf,在 [Interface] 中添加以下配置:

如果你使用 ping 或者 dig 等工具(小数据包)测试 WireGuard 隧道能够正常工作,但浏览器或者远程桌面(大数据包)却无法正常访问,很有可能是 MTU 的问题,你需要将 MTU 的值调小一点。

Phantun 官方建议将 MTU 的值设为 1428(假设物理网卡的 MTU 是 1500),但经我测试是有问题的。建议直接将 MTU 设置为最低值 1280,然后渐渐增加,直到无法正常工作为止,此时你的 MTU 就是最佳值。

MTU = 1300
PreUp = iptables -t nat -A PREROUTING -p tcp -i eth0 --dport 4567 -j DNAT --to-destination 192.168.201.2
PreUp = RUST_LOG=info phantun_server --local 4567 --remote 127.0.0.1:51822 &> /var/log/phantun_server.log &
PostDown = iptables -t nat -D PREROUTING -p tcp -i eth0 --dport 4567 -j DNAT --to-destination 192.168.201.2
PostDown = killall phantun_server || true

你需要将 eth0 替换为你服务端的物理网卡名。MTU 值先不管,后面再告诉大家调试方法。

PreUp = iptables -t nat -A PREROUTING -p tcp -i eth0 --dport 4567 -j DNAT --to-destination 192.168.201.2

这条 iptables 规则表示将 4567 端口的入站流量 DNAT 为 TUN 网卡的 IP 地址。

PreUp = RUST_LOG=info phantun_server --local 4567 --remote 127.0.0.1:51822 &> /var/log/phantun_server.log &

这里会启动 phantun_server,监听在 4567 端口,并将 UDP 数据包转发到 WireGuard。

服务端完整的 WireGuard 配置:

# local settings for Endpoint B
[Interface]
PrivateKey = QH1BJzIZcGo89ZTykxls4i2DKgvByUkHIBy3BES2gX8=
Address = 10.0.0.2/32
ListenPort = 51822
MTU = 1300
PreUp = iptables -t nat -A PREROUTING -p tcp -i eth0 --dport 4567 -j DNAT --to-destination 192.168.201.2
PreUp = RUST_LOG=info phantun_server --local 4567 --remote 127.0.0.1:51822 &> /var/log/phantun_server.log &
PostDown = iptables -t nat -D PREROUTING -p tcp -i eth0 --dport 4567 -j DNAT --to-destination 192.168.201.2
PostDown = killall phantun_server || true

# remote settings for Endpoint A
[Peer]
PublicKey = wXtD/VrRo92JHc66q4Ypmnd4JpMk7b1Sb0AcT+pJfwY=
AllowedIPs = 10.0.0.1/32

最后重启 WireGuard 即可:

$ systemctl restart wg-quick@wg0
客户端:

假设客户端的 WireGuard 监听端口是 51821。首先修改配置文件 /etc/wireguard/wg0.conf,在 [Interface] 中添加以下配置:

MTU = 1300
PreUp = iptables -t nat -A POSTROUTING -o eth0 -s 192.168.200.2 -j MASQUERADE
PreUp = RUST_LOG=info phantun_client --local 127.0.0.1:4567 --remote 121.36.134.95:4567 &> /var/log/phantun_client.log &
PostDown = iptables -t nat -D POSTROUTING -o eth0 -s 192.168.200.2 -j MASQUERADE
PostDown = killall phantun_client || true

你需要将 eth0 替换为你服务端的物理网卡名。

PreUp = iptables -t nat -A POSTROUTING -o eth0 -s 192.168.200.2 -j MASQUERADE

这条 iptables 规则表示对来自 192.168.200.2(TUN 网卡) 的出站流量进行 MASQUERADE。

PreUp = RUST_LOG=info phantun_client --local 127.0.0.1:4567 --remote 121.36.134.95:4567 &> /var/log/phantun_client.log &

这里会启动 phantun_client,监听在 4567 端口,并与服务端建立连接,将伪装的 TCP 数据包传送给服务端。

除此之外还需要修改 WireGuard peer 的 Endpoint,将其修改为 127.0.0.1:4567。

Endpoint = 127.0.0.1:4567

客户端完整的 WireGuard 配置:

# local settings for Endpoint A
[Interface]
PrivateKey = 0Pyz3cIg2gRt+KxZ0Vm1PvSIU+0FGufPIzv92jTyGWk=
Address = 10.0.0.1/32
ListenPort = 51821
MTU = 1300
PreUp = iptables -t nat -A POSTROUTING -o eth0 -s 192.168.200.2 -j MASQUERADE
PreUp = RUST_LOG=info phantun_client --local 127.0.0.1:4567 --remote 121.36.134.95:4567 &> /var/log/phantun_client.log &
PostDown = iptables -t nat -D POSTROUTING -o eth0 -s 192.168.200.2 -j MASQUERADE
PostDown = killall phantun_client || true

# remote settings for Endpoint B
[Peer]
PublicKey = m40NDb5Cqtb78b1DVwY1+kxbG2yEcRhxlrLm/DlPpz8=
Endpoint = 127.0.0.1:4567
AllowedIPs = 10.0.0.2/32
PersistentKeepalive = 25

最后重启 WireGuard 即可:

$ systemctl restart wg-quick@wg0
客户端(多服务端):

如果客户端想和多个服务端建立连接,则新增的服务端配置如下:

PreUp = RUST_LOG=info phantun_client --local 127.0.0.1:4568 --remote xxxx:4567 --tun-local=192.168.202.1 --tun-peer=192.168.202.2 &> /var/log/phantun_client.log &
PostDown = iptables -t nat -D POSTROUTING -o eth0 -s 192.168.202.2 -j MASQUERADE

本地监听端口需要选择一个与之前不同的端口,同理,TUN 网卡的地址也需要修改。最终的配置如下:

# local settings for Endpoint A
[Interface]
PrivateKey = 0Pyz3cIg2gRt+KxZ0Vm1PvSIU+0FGufPIzv92jTyGWk=
Address = 10.0.0.1/32
ListenPort = 51821
MTU = 1300
PreUp = iptables -t nat -A POSTROUTING -o eth0 -s 192.168.200.2 -j MASQUERADE
PreUp = RUST_LOG=info phantun_client --local 127.0.0.1:4567 --remote 121.36.134.95:4567 &> /var/log/phantun_client.log &
PreUp = RUST_LOG=info phantun_client --local 127.0.0.1:4568 --remote xxxx:4567 --tun-local=192.168.202.1 --tun-peer=192.168.202.2 &> /var/log/phantun_client.log &
PostDown = iptables -t nat -D POSTROUTING -o eth0 -s 192.168.200.2 -j MASQUERADE
PostDown = iptables -t nat -D POSTROUTING -o eth0 -s 192.168.202.2 -j MASQUERADE
PostDown = killall phantun_client || true

# remote settings for Endpoint B
[Peer]
PublicKey = m40NDb5Cqtb78b1DVwY1+kxbG2yEcRhxlrLm/DlPpz8=
Endpoint = 127.0.0.1:4567
AllowedIPs = 10.0.0.2/32
PersistentKeepalive = 25