From 2c604e01f962ae21828d549c7bef1ec41b1b84d3 Mon Sep 17 00:00:00 2001 From: "weizhou.lan@daocloud.io" Date: Thu, 14 Dec 2023 19:11:57 +0800 Subject: [PATCH] d Signed-off-by: weizhou.lan@daocloud.io --- docs/develop/roadmap.md | 130 +++++++++++++++++----------------- docs/usage/rdma-ib-zh_CN.md | 2 +- docs/usage/rdma-ib.md | 2 +- docs/usage/rdma-roce-zh_CN.md | 4 +- docs/usage/rdma-roce.md | 58 +++++++-------- 5 files changed, 93 insertions(+), 103 deletions(-) diff --git a/docs/develop/roadmap.md b/docs/develop/roadmap.md index 43cca7f1b1..6942847e35 100644 --- a/docs/develop/roadmap.md +++ b/docs/develop/roadmap.md @@ -1,68 +1,68 @@ # roadmap -| feature | description | Alpha release | Beta release | GA release | -|--------------------------|--------------------------------------------------------------------------------------------------------------------------------------|---------------|--------------|------------| -| ippool | ip settings | v0.2.0 | v0.4.0 | v0.6.0 | -| | namespace affinity | v0.4.0 | v0.6.0 | | -| | application affinity | v0.4.0 | v0.6.0 | | -| | multiple default ippool | v0.6.0 | | | -| | multusname affinity | v0.6.0 | | | -| | nodename affinity | v0.6.0 | v0.6.0 | -| | default cluster ippool | v0.2.0 | v0.4.0 | v0.6.0 | -| | default namespace ippool | v0.4.0 | v0.5.0 | | -| | default CNI ippool | v0.4.0 | v0.4.0 | | -| | annotation ippool | v0.2.0 | v0.5.0 | | -| | annotation route | v0.2.0 | v0.5.0 | | -| | ippools for multi-interfaces without specified interface name in annotation | v0.9.0 | | | -| subnet | automatically create ippool | v0.4.0 | | | -| | automatically scaling and deletion ip according to application | v0.4.0 | | | -| | automatically delete ippool | v0.5.0 | | | -| | annotation for multiple interface | v0.4.0 | | | -| | keep ippool after deleting application | v0.5.0 | | | -| | support deployment, statefulset, job, replicaset | v0.4.0 | | | -| | support operator controller | v0.4.0 | | | -| | flexible ip number | v0.5.0 | | | -| | ippool inherit route and gateway attribute from its subnet | v0.6.0 | | | -| reservedIP | reservedIP | v0.4.0 | v0.6.0 | | -| fixed ip | fixed ip for each pod of statefulset | v0.5.0 | | | -| | fixed ip ranges for statefulset, deployment, replicaset | v0.4.0 | v0.6.0 | | -| | fixed ip for kubevirt | v0.8.0 | | | -| | support calico | v0.5.0 | v0.6.0 | | -| | support weave | v0.5.0 | v0.6.0 | | -| spidermultusconfig | support macvlan ipvlan sriov custom | v0.6.0 | v0.7.0 | | -| | support ovs-cni | v0.7.0 | | | -| ipam plugin | cni v1.0.0 | v0.4.0 | v0.5.0 | | -| ifacer plugin | bond interface | v0.6.0 | v0.8.0 | | -| | vlan interface | v0.6.0 | v0.8.0 | | -| coordinator plugin | support underlay mode | v0.6.0 | v0.7.0 | | -| | support overlay mode | v0.6.0 | v0.8.0 | | -| | CRD spidercoordinators for multus configuration | v0.6.0 | v0.8.0 | | -| | detect ip conflict and gateway | v0.6.0 | v0.6.0 | | -| | specify the MAC of pod | v0.6.0 | v0.8.0 | | -| | tune the default route of pod multiple interfaces | v0.6.0 | v0.8.0 | | -| ovs/macvlan/sriov/ipvlan | visit service based on kube-proxy | v0.6.0 | v0.7.0 | | -| | visit local node to guarantee the pod health check | v0.6.0 | v0.7.0 | | -| | visit nodePort with spec.externalTrafficPolicy=local or spec.externalTrafficPolicy=cluster | v0.6.0 | | | -| | bandwidth | In plan | | | -| observability | eBPF: pod stats | In plan | | | -| network policy | ipvlan | v0.8.0 | | | -| | macvlan | In plan | | | -| | sriov | In plan | | | -| bandwidth | ipvlan | v0.8.0 | | | -| | macvlan | In plan | | | -| | sriov | In plan | | | -| accelerate | eBPF: visit service based on the kube-proxy replacement | In plan | | | -| | eBPF: accelerate communication of pods on a same node | In plan | | | -| recycle IP | recycle IP taken by deleted pod | v0.4.0 | v0.6.0 | | -| | recycle IP taken by deleting pod | v0.4.0 | v0.6.0 | | -| dual-stack | dual-stack | v0.2.0 | v0.4.0 | | -| CLI | debug and operate. check which pod an IP is taken by, check IP usage , trigger GC | In plan | | | +| feature | description | Alpha release | Beta release | GA release | +|--------------------------|-----------------------------------------------------------------------------------------------------------------------|---------------|--------------|------------| +| ippool | ip settings | v0.2.0 | v0.4.0 | v0.6.0 | +| | namespace affinity | v0.4.0 | v0.6.0 | | +| | application affinity | v0.4.0 | v0.6.0 | | +| | multiple default ippool | v0.6.0 | | | +| | multusname affinity | v0.6.0 | | | +| | nodename affinity | v0.6.0 | v0.6.0 | +| | default cluster ippool | v0.2.0 | v0.4.0 | v0.6.0 | +| | default namespace ippool | v0.4.0 | v0.5.0 | | +| | default CNI ippool | v0.4.0 | v0.4.0 | | +| | annotation ippool | v0.2.0 | v0.5.0 | | +| | annotation route | v0.2.0 | v0.5.0 | | +| | ippools for multi-interfaces without specified interface name in annotation | v0.9.0 | | | +| subnet | automatically create ippool | v0.4.0 | | | +| | automatically scaling and deletion ip according to application | v0.4.0 | | | +| | automatically delete ippool | v0.5.0 | | | +| | annotation for multiple interface | v0.4.0 | | | +| | keep ippool after deleting application | v0.5.0 | | | +| | support deployment, statefulset, job, replicaset | v0.4.0 | | | +| | support operator controller | v0.4.0 | | | +| | flexible ip number | v0.5.0 | | | +| | ippool inherit route and gateway attribute from its subnet | v0.6.0 | | | +| reservedIP | reservedIP | v0.4.0 | v0.6.0 | | +| fixed ip | fixed ip for each pod of statefulset | v0.5.0 | | | +| | fixed ip ranges for statefulset, deployment, replicaset | v0.4.0 | v0.6.0 | | +| | fixed ip for kubevirt | v0.8.0 | | | +| | support calico | v0.5.0 | v0.6.0 | | +| | support weave | v0.5.0 | v0.6.0 | | +| spidermultusconfig | support macvlan ipvlan sriov custom | v0.6.0 | v0.7.0 | | +| | support ovs-cni | v0.7.0 | | | +| ipam plugin | cni v1.0.0 | v0.4.0 | v0.5.0 | | +| ifacer plugin | bond interface | v0.6.0 | v0.8.0 | | +| | vlan interface | v0.6.0 | v0.8.0 | | +| coordinator plugin | support underlay mode | v0.6.0 | v0.7.0 | | +| | support overlay mode | v0.6.0 | v0.8.0 | | +| | CRD spidercoordinators for multus configuration | v0.6.0 | v0.8.0 | | +| | detect ip conflict and gateway | v0.6.0 | v0.6.0 | | +| | specify the MAC of pod | v0.6.0 | v0.8.0 | | +| | tune the default route of pod multiple interfaces | v0.6.0 | v0.8.0 | | +| ovs/macvlan/sriov/ipvlan | visit service based on kube-proxy | v0.6.0 | v0.7.0 | | +| | visit local node to guarantee the pod health check | v0.6.0 | v0.7.0 | | +| | visit nodePort with spec.externalTrafficPolicy=local or spec.externalTrafficPolicy=cluster | v0.6.0 | | | +| | bandwidth | In plan | | | +| observability | eBPF: pod stats | In plan | | | +| network policy | ipvlan | v0.8.0 | | | +| | macvlan | In plan | | | +| | sriov | In plan | | | +| bandwidth | ipvlan | v0.8.0 | | | +| | macvlan | In plan | | | +| | sriov | In plan | | | +| eBPF | implement service by cgroup eBPF | In plan | | | +| | accelerate communication of pods on a same node | In plan | | | +| recycle IP | recycle IP taken by deleted pod | v0.4.0 | v0.6.0 | | +| | recycle IP taken by deleting pod | v0.4.0 | v0.6.0 | | +| dual-stack | dual-stack | v0.2.0 | v0.4.0 | | +| CLI | debug and operate. check which pod an IP is taken by, check IP usage , trigger GC | In plan | | | | multi-cluster | a broker cluster could synchronize ippool resource within a same subnet from all member clusters, which could help avoid IP conflict | In plan | | | -| | support submariner | v0.8.0 | | | -| dual CNI | underlay cooperate with cilium | v0.7.0 | | | -| | underlay cooperate with calico | v0.7.0 | | | -| RDMA | support macvlan and ipvlan CNI for RoCE device | v0.8.0 | | | -| | support sriov CNI for RoCE device | v0.8.0 | | | -| | support ipoib CNI for infiniband device | In plan | | | -| | support ib-sriov CNI for infiniband device | In plan | | | -| egressGateway | egressGateway | v0.8.0 | | | +| | support submariner | v0.8.0 | | | +| dual CNI | underlay cooperate with cilium | v0.7.0 | | | +| | underlay cooperate with calico | v0.7.0 | | | +| RDMA | support macvlan and ipvlan CNI for RoCE device | v0.8.0 | | | +| | support sriov CNI for RoCE device | v0.8.0 | | | +| | support ipoib CNI for infiniband device | v0.9.0 | | | +| | support ib-sriov CNI for infiniband device | v0.9.0 | | | +| egressGateway | egressGateway | v0.8.0 | | | diff --git a/docs/usage/rdma-ib-zh_CN.md b/docs/usage/rdma-ib-zh_CN.md index a4e3e8a395..640bbb5bd0 100644 --- a/docs/usage/rdma-ib-zh_CN.md +++ b/docs/usage/rdma-ib-zh_CN.md @@ -92,7 +92,7 @@ apiVersion: sriovnetwork.openshift.io/v1 kind: SriovNetworkNodePolicy metadata: - name: ibsriov + name: ib-sriov namespace: kube-system spec: nodeSelector: diff --git a/docs/usage/rdma-ib.md b/docs/usage/rdma-ib.md index 4ceed63acd..45dfa5cc3d 100644 --- a/docs/usage/rdma-ib.md +++ b/docs/usage/rdma-ib.md @@ -92,7 +92,7 @@ The following steps demonstrate how to use [IB-SRIOV](https://github.com/k8snetw apiVersion: sriovnetwork.openshift.io/v1 kind: SriovNetworkNodePolicy metadata: - name: ibsriov + name: ib-sriov namespace: kube-system spec: nodeSelector: diff --git a/docs/usage/rdma-roce-zh_CN.md b/docs/usage/rdma-roce-zh_CN.md index 790dd0464f..aa9d2ed565 100644 --- a/docs/usage/rdma-roce-zh_CN.md +++ b/docs/usage/rdma-roce-zh_CN.md @@ -250,10 +250,10 @@ RDMA 网卡,也可以基于 SR-IOV CNI 来使用 exclusive 模式的网卡。 (可选)SR-IOV 场景下,应用可使 NVIDIA 的 GPUDirect RMDA 功能,可参考 [官方文档](https://network.nvidia.com/products/GPUDirect-RDMA/) 安装内核模块。 - 2. 安装 Spiderpool - 务必设置 helm 选项 `--set sriov.install=true` + - 如果您是国内用户,可以指定参数 `--set global.imageRegistryOverride=ghcr.m.daocloud.io` 避免 Spiderpool 的镜像拉取失败。 完成 Spiderpool 安装后,可以手动编辑 configmap spiderpool-rdma-shared-device-plugin 来重新配置 RDMA shared device plugin @@ -281,7 +281,7 @@ RDMA 网卡,也可以基于 SR-IOV CNI 来使用 exclusive 模式的网卡。 apiVersion: sriovnetwork.openshift.io/v1 kind: SriovNetworkNodePolicy metadata: - name: policyrdma + name: roce-sriov namespace: kube-system spec: nodeSelector: diff --git a/docs/usage/rdma-roce.md b/docs/usage/rdma-roce.md index d034d9ba49..940cc14c33 100644 --- a/docs/usage/rdma-roce.md +++ b/docs/usage/rdma-roce.md @@ -224,22 +224,19 @@ The following steps demonstrate how to enable shared usage of RDMA devices by Po The following steps demonstrate how to enable isolated usage of RDMA devices by Pods in a cluster with two nodes via SR-IOV CNI: -1. Ensure that the host machine has an RDMA and SR-IOV enabled card and the driver is properly installed, ensuring proper RDMA functioning. +1. Ensure that the host machine has an RDMA and SR-IOV enabled card and the driver is properly installed. In our demo environment, the host machine is equipped with a Mellanox ConnectX-5 NIC with RoCE capabilities. Follow [the official NVIDIA guide](https://developer.nvidia.com/networking/ethernet-software) to install the latest OFED driver. - > To isolate the usage of an RDMA network card, ensure that at least one of the following conditions is met: - > - > (1) Kernel based on 5.3.0 or newer, RDMA modules loaded in the system. rdma-core package provides means to automatically load relevant modules on system start - > - > (2) Mellanox OFED version 4.7 or newer is required. In this case it is not required to use a Kernel based on 5.3.0 or newer. - - To confirm the presence of RDMA devices, use the following command: + To confirm the presence of RoCE devices, use the following command: - ~# rdma link show + ~# rdma link link mlx5_0/1 state ACTIVE physical_state LINK_UP netdev ens6f0np0 link mlx5_1/1 state ACTIVE physical_state LINK_UP netdev ens6f1np1 + ~# ibstat mlx5_0 | grep "Link layer" + Link layer: Ethernet + Make sure that the RDMA subsystem on the host is operating in exclusive mode. If not, switch to exclusive mode. # switch to exclusive mode and fail to restart the host @@ -250,22 +247,9 @@ The following steps demonstrate how to enable isolated usage of RDMA devices by ~# rdma system netns exclusive copy-on-fork on - To verify if the network card has SR-IOV functionality, check the maximum number of supported VFs: - - ~# cat /sys/class/net/ens6f0np0/device/sriov_totalvfs - 127 - (Optional) in an SR-IOV scenario, applications can enable NVIDIA's GPUDirect RDMA feature. For instructions on installing the kernel module, please refer to [the official documentation](https://network.nvidia.com/products/GPUDirect-RDMA/). -2. Verify the details of the RDMA card for subsequent device resource discovery by the device plugin. - - Enter the following command with NIC vendors being 15b3 and its deviceIDs being 1017: - - ~# lspci -nn | grep Ethernet - af:00.0 Ethernet controller [0200]: Mellanox Technologies MT27800 Family [ConnectX-5] [15b3:1017] - af:00.1 Ethernet controller [0200]: Mellanox Technologies MT27800 Family [ConnectX-5] [15b3:1017] - -3. Install Spiderpool +2. Install Spiderpool - set the values `--set sriov.install=true` @@ -281,7 +265,13 @@ The following steps demonstrate how to enable isolated usage of RDMA devices by spiderpool-sriov-operator-65b59cd75d-89wtg 1/1 Running 0 1m spiderpool-init 0/1 Completed 0 1m -4. Configure SR-IOV operator +3. Configure SR-IOV operator + + Look up the device information of the RoCE interface. Enter the following command to get NIC vendors 15b3 and deviceIDs 1017 + + ~# lspci -nn | grep Ethernet + af:00.0 Ethernet controller [0200]: Mellanox Technologies MT27800 Family [ConnectX-5] [15b3:1017] + af:00.1 Ethernet controller [0200]: Mellanox Technologies MT27800 Family [ConnectX-5] [15b3:1017] With the following configuration, the SR-IOV operator can create VFs on the host and report the resources: @@ -289,12 +279,12 @@ The following steps demonstrate how to enable isolated usage of RDMA devices by apiVersion: sriovnetwork.openshift.io/v1 kind: SriovNetworkNodePolicy metadata: - name: policyrdma + name: roce-sriov namespace: kube-system spec: nodeSelector: kubernetes.io/os: "linux" - resourceName: mellanoxrdma + resourceName: mellanoxroce priority: 99 numVfs: 12 nicSelector: @@ -315,14 +305,14 @@ The following steps demonstrate how to enable isolated usage of RDMA devices by "allocable": { "cpu": "40", "pods": "110", - "spidernet.io/mellanoxrdma": "12", + "spidernet.io/mellanoxroce": "12", ... } }, ... ] -5. Create multus configurations related to SR-IOV and create corresponding ippool resources. +5. Create macvlan CNI configuration and corresponding ippool resources. cat <