diff --git a/.github/workflows/build-lts.yaml b/.github/workflows/build-lts.yaml new file mode 100644 index 000000000..1ac10c26e --- /dev/null +++ b/.github/workflows/build-lts.yaml @@ -0,0 +1,39 @@ +name: BUILD-LTS + +on: + push: + branches: + - 'DPVS-1.9-LTS' + release: + branches: + - 'DPVS-1.9-LTS' + types: + - published + pull_request: + branches: + - 'DPVS-1.9-LTS' + types: + - labeled + +jobs: + build-basic: + runs-on: self-hosted + env: + PKG_CONFIG_PATH: /data/dpdk/20.11.10/dpdklib/lib64/pkgconfig + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - name: Build + run: make -j + + build-all: + runs-on: self-hosted + env: + PKG_CONFIG_PATH: /data/dpdk/20.11.10/dpdklib/lib64/pkgconfig + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - name: Config + run: sed -i 's/=n$/=y/' config.mk + - name: Build + run: make -j diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index a75508602..0fe3eba9d 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -1,38 +1,44 @@ -name: Build +name: BUILD on: push: - branches: [master, devel, DPVS-1.8-LTS] + branches: + - 'master' + - 'devel' release: - branches: [master] - types: [published] + branches: + - 'master' + - 'devel' + types: + - published schedule: - cron: '30 2 * * 1' pull_request: - branches: [master, devel, DPVS-1.8-LTS] - types: [labeled] + branches: + - 'master' + - 'devel' + types: + - labeled jobs: build-basic: runs-on: self-hosted env: - PKG_CONFIG_PATH: /data/dpdk/dpdklib/lib64/pkgconfig - ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true + PKG_CONFIG_PATH: /data/dpdk/24.11/dpdklib/lib64/pkgconfig steps: - name: Checkout Code - uses: actions/checkout@v3 - - name: build + uses: actions/checkout@v4 + - name: Build run: make -j - + build-all: runs-on: self-hosted env: - PKG_CONFIG_PATH: /data/dpdk/dpdklib/lib64/pkgconfig - ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true + PKG_CONFIG_PATH: /data/dpdk/24.11/dpdklib/lib64/pkgconfig steps: - name: Checkout Code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Config run: sed -i 's/=n$/=y/' config.mk - - name: build + - name: Build run: make -j diff --git a/.github/workflows/run-lts.yaml b/.github/workflows/run-lts.yaml new file mode 100644 index 000000000..291b27572 --- /dev/null +++ b/.github/workflows/run-lts.yaml @@ -0,0 +1,32 @@ +name: RUN-LTS + +on: + push: + branches: + - 'DPVS-1.9-LTS' + release: + branches: + - 'DPVS-1.9-LTS' + types: + - published + pull_request: + types: + - labeled + branches: + - 'DPVS-1.9-LTS' + +jobs: + run-dpvs: + runs-on: self-hosted + env: + PKG_CONFIG_PATH: /data/dpdk/20.11.10/dpdklib/lib64/pkgconfig + #ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - name: Build + run: make -j + - name: Install + run: make install + - name: Run DPVS + run: sudo dpvsci $(pwd)/bin/dpvs diff --git a/.github/workflows/run.yaml b/.github/workflows/run.yaml index f71248963..0385fedef 100644 --- a/.github/workflows/run.yaml +++ b/.github/workflows/run.yaml @@ -1,26 +1,33 @@ -name: Run +name: RUN on: push: - branches: [master, devel, DPVS-1.8-LTS] + branches: + - 'master' + - 'devel' release: - branches: [master] - types: [published] + branches: + - 'master' + - 'devel' + types: + - published schedule: - - cron: '30 3 * * 1' + - cron: '30 3 * * 1' pull_request: - branches: [master, devel, DPVS-1.8-LTS] - types: [labeled] + types: + - labeled + branches: + - 'master' + - 'devel' jobs: run-dpvs: runs-on: self-hosted env: - PKG_CONFIG_PATH: /data/dpdk/dpdklib/lib64/pkgconfig - ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true + PKG_CONFIG_PATH: /data/dpdk/24.11/dpdklib/lib64/pkgconfig steps: - name: Checkout Code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Build run: make -j - name: Install diff --git a/README.md b/README.md index 22b8465fd..5659d413a 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ -![Build](https://github.com/iqiyi/dpvs/workflows/Build/badge.svg) ![Run](https://github.com/iqiyi/dpvs/workflows/Run/badge.svg) +![Build](https://github.com/iqiyi/dpvs/workflows/build.yaml/badge.svg) ![Run](https://github.com/iqiyi/dpvs/workflows/run.yaml/badge.svg) ![Build(LTS)](https://github.com/iqiyi/dpvs/workflows/build-lts.yaml/badge.svg) ![Run(LTS)](https://github.com/iqiyi/dpvs/workflows/run-lts.yaml/badge.svg) ![dpvs-logo.png](./pic/DPVS-logo.png) # Introduction -`DPVS` is a high performance **Layer-4 load balancer** based on [DPDK](http://dpdk.org). It's derived from Linux Virtual Server [LVS](http://www.linuxvirtualserver.org/) and its modification [alibaba/LVS](https://github.com/alibaba/LVS). +`DPVS` is a high performance **Layer-4 load balancer** based on [DPDK](http://dpdk.org). It derives from Linux Virtual Server [LVS](http://www.linuxvirtualserver.org/) and its modification [alibaba/LVS](https://github.com/alibaba/LVS). > Notes: The name `DPVS` comes from "DPDK-LVS". @@ -43,25 +43,29 @@ DPVS consists of the modules illustrated in the diagram below. This *quick start* is performed in the environments described below. -* Linux Distribution: CentOS 7.6 -* Kernel: 3.10.0-957.el7.x86_64 * CPU: Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz * NIC: Intel Corporation Ethernet Controller 10-Gigabit X540-AT2 (rev 03) * Memory: 64G with two NUMA node. -* GCC: 4.8.5 20150623 (Red Hat 4.8.5-36) +* Linux Distribution: Anolis OS release 8.8 +* Kernel: 5.10.134-13.an8.x86_64 +* GCC: gcc (GCC) 8.5.0 20210514 (Anolis 8.5.0-10.0.3) +* Python: 3.6 (with pyelftools: 0.31) +* meson: 0.58.2 +* pkgconf: 1.4.2 +* numactl-devel: 2.0.14 (required by DPDK on NUMA-aware system) * Golang: go1.20.4 linux/amd64 (required only when CONFIG_DPVS_AGENT enabled). -Other environments should also be OK if DPDK works, please check [dpdk.org](http://www.dpdk.org) for more information. +Other environments should also be OK if DPDK works, please check [DPDK Supported Hardware](https://core.dpdk.org/supported/) and [DPDK System Requirements](https://doc.dpdk.org/guides/linux_gsg/sys_reqs.html#) for more information. > Notes: -> 1. Please check this link for NICs supported by DPDK: http://dpdk.org/doc/nics. +> 1. Please check this link for NICs supported by DPDK: http://core.dpdk.org/supported. > 2. `Flow Control` ([rte_flow](http://dpdk.org/doc/guides/nics/overview.html#id1)) is required for `FNAT` and `SNAT` mode when DPVS running on multi-cores unless `conn redirect` is enabled. The minimum requirements to ensure DPVS works with multi-core properly is that `rte_flow` must support "ipv4, ipv6, tcp, udp" four items, and "drop, queue" two actions. > 3. DPVS doesn't confine itself to the this test environments. In fact, DPVS is an user-space application which relies very little on operating system, kernel versions, compilers, and other platform discrepancies. As far as is known, DPVS has been verified at least in the following environments. -> * Centos 7.2, 7.6, 7.9 > * Anolis 8.6, 8.8, 8.9 -> * GCC 4.8, 8.5 +> * GCC 8.5 > * Kernel: 3.10.0, 4.18.0, 5.10.134 > * NIC: Intel IXGBE, NVIDIA MLX5 +> * Centos 7.x and GCC 4.8 are also supported by DPVS versions earlier than v1.10. ## Clone DPVS @@ -74,27 +78,35 @@ Well, let's start from DPDK then. ## DPDK setup -Currently, `dpdk-stable-20.11.10` is recommended for `DPVS`, and we will not support dpdk version earlier than dpdk-20.11 any more. If you are still using earlier dpdk versions, such as `dpdk-stable-17.11.6` and `dpdk-stable-18.11.2`, please use earlier DPVS releases, such as [v1.8.12](https://github.com/iqiyi/dpvs/releases/tag/v1.8.12). +Currently, `dpdk-24.11` is recommended for `DPVS`, and we will not support dpdk versions earlier than dpdk-20.11 any more. If you are still using earlier dpdk versions, please use earlier [DPVS releases](https://github.com/iqiyi/dpvs/releases). The best matched DPDK versions are listed in the table below. -> Notes: You can skip this section if experienced with DPDK, and refer the [link](http://dpdk.org/doc/guides/linux_gsg/index.html) for details. +| DPVS Version | DPDK Version | +| --------------- | ---------------- | +| v1.10 | 24.11 | +| v1.9 | 20.11 | +| v1.8 | 18.11 | +| v1.7 or earlier | 17.11 or earlier | + +> Notes: You can skip this section if experienced with DPDK, and refer to this [link](https://doc.dpdk.org/guides/linux_gsg/index.html) for details. ```bash -$ wget https://fast.dpdk.org/rel/dpdk-20.11.10.tar.xz # download from dpdk.org if link failed. -$ tar xf dpdk-20.11.10.tar.xz +$ wget https://fast.dpdk.org/rel/dpdk-24.11.tar.xz # download from dpdk.org if link failed. +$ tar xf dpdk-24.11.tar.xz ``` -### DPDK patchs +### DPDK patches -There are some patches for DPDK to support extra features needed by DPVS. Apply them if needed. For example, there's a patch for DPDK `rte_kni` driver for hardware multicast, apply it if you want to use `rte_kni` as your management network for such exception data path as SSH, OSPF, BGP, etc. +There are some patches for DPDK to support extra features needed by DPVS. Apply them if needed. For example, there's a patch for DPDK ixgbe flow, apply it if you are using the ixgbe network adapter. -> Notes: It's assumed we are in DPVS root directory where you have installed dpdk-stable-20.11.10 source codes. Please note it's not mandatory, just for convenience. +> Notes: It's assumed we are in DPVS root directory where you have installed dpdk-24.11 source codes. Please note it's not mandatory, just for convenience. ``` $ cd -$ cp patch/dpdk-stable-20.11.10/*.patch dpdk-stable-20.11.10/ -$ cd dpdk-stable-20.11.10/ -$ patch -p1 < 0001-kni-use-netlink-event-for-multicast-driver-part.patch -$ patch -p1 < 0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch +$ cp patch/dpdk-24.11/*.patch dpdk-24.11/ +$ cd dpdk-24.11/ +$ patch -p1 < 0001-pdump-add-cmdline-packet-filters-for-dpdk-pdump-tool.patch +$ patch -p1 < 0002-debug-enable-dpdk-eal-memory +$ patch -p1 < 0003-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch $ ... ``` @@ -105,7 +117,7 @@ $ ... Use meson-ninja to build DPDK, and export environment variable `PKG_CONFIG_PATH` for DPDK application (DPVS). The sub-Makefile `src/dpdk.mk` in DPVS will check the presence of libdpdk. ```bash -$ cd dpdk-stable-20.11.10 +$ cd dpdk-24.11 $ mkdir dpdklib # user desired install folder $ mkdir dpdkbuild # user desired build folder $ meson -Denable_kmods=true -Dprefix=dpdklib dpdkbuild @@ -116,7 +128,7 @@ $ export PKG_CONFIG_PATH=$(pwd)/../dpdklib/lib64/pkgconfig/ > Tips: You can use script [dpdk-build.sh](./scripts/dpdk-build.sh) to facilitate dpdk build. Run `dpdk-build.sh -h` for the usage of the script. -Next is to set up DPDK hugepage. Our test environment is NUMA system. For single-node system please refer to the [link](http://dpdk.org/doc/guides/linux_gsg/sys_reqs.html). +Next is to set up DPDK hugepage. Our test environment is NUMA system. For single-node system please refer to the [link](http://doc.dpdk.org/guides/linux_gsg/sys_reqs.html#use-of-hugepages-in-the-linux-environment). ```bash $ # for NUMA machine @@ -137,34 +149,31 @@ $ mount -t hugetlbfs nodev /mnt/huge ``` > Notes: -> 1. Hugepages of other size, such as 1GB-size hugepages, can also be used if your system supports. +> 1. Hugepages of other sizes, such as 1GB-size hugepages, can also be used if your system supports. > 2. It's recommended to reserve hugepage memory and isolate CPUs used by DPVS with linux kernel cmdline options in production environments, for example `isolcpus=1-9 default_hugepagesz=1G hugepagesz=1G hugepages=32`. Next, install kernel modules required by DPDK and DPVS. * DPDK driver kernel module: -Depending on your NIC and system, NIC may require binding a DPDK-compitable driver, such as `vfio-pci`, `igb_uio`, or `uio_pci_generic`. Refer to [DPDK doc](https://doc.dpdk.org/guides/linux_gsg/linux_drivers.html) for more details. In this test, we use the linux standard UIO kernel module `uio_pci_generic`. +Depending on your NIC and system, NIC may require binding a DPDK-compitable driver, such as `vfio-pci`, `igb_uio`, or `uio_pci_generic`. Note that some NICs supporting bifircated driver should skip the step. Refer to [DPDK Linux Drivers](https://doc.dpdk.org/guides/linux_gsg/linux_drivers.html) for details. In this test, we use the linux standard UIO kernel module `uio_pci_generic` for the ixgbe NIC. * KNI kernel module: -KNI kernel module `rte_kni.ko` is required as a solution to the exception data path to handle all packets not processed in DPVS. +The KNI kernel module, library and PMD has been removed since DPDK 23.11 release. DPVS has replaced it with virtio-user devices(namely virtio-user kni) since v1.10. Nevertheless, the kernel module `rte_kni.ko` is required if you are still using DPDK KNI in earlier DPDK/DPVS versions as a solution to the exception data path. ```bash $ modprobe uio_pci_generic -$ cd dpdk-stable-20.11.10 -$ insmod dpdkbuild/kernel/linux/kni/rte_kni.ko carrier=on - -$ # bind eth0 to uio_pci_generic (Be aware: Network on eth0 will get broken!) +$ ## bind eth0 to uio_pci_generic (Be aware: Network on eth0 will get broken!) $ ./usertools/dpdk-devbind.py --status $ ifconfig eth0 down # assuming eth0's pci-bus location is 0000:06:00.0 $ ./usertools/dpdk-devbind.py -b uio_pci_generic 0000:06:00.0 ``` > Notes: > 1. The test in our Quick Start uses only one NIC. Bind as many NICs as required in your DPVS application to DPDK driver kernel module. For example, you should bind at least 2 NICs if you are testing DPVS with two-arm. -> 2. `dpdk-devbind.py -u` can be used to unbind driver and switch it back to Linux driver like `ixgbe`. Use `lspci` or `ethtool -i eth0` to check the NIC's PCI bus-id. Please refer to [DPDK Doc:Binding and Unbinding Network Ports to/from the Kernel Modules](https://doc.dpdk.org/guides/linux_gsg/linux_drivers.html#binding-and-unbinding-network-ports-to-from-the-kernel-modules) for more details. -> 3. NVIDIA/Mellanox NIC uses bifurcated driver which doesn't rely on UIO/VFIO driver, so not bind any DPDK driver kernel module, but [NVIDIA MLNX_OFED/EN](https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/) is required. Refer to [Mellanox DPDK](https://enterprise-support.nvidia.com/s/article/mellanox-dpdk) for its PMD and [Compilation Prerequisites](https://doc.dpdk.org/guides/platform/mlx5.html#linux-prerequisites) for OFED installation. -> 4. A kernel module parameter `carrier` has been added to `rte_kni.ko` since [DPDK v18.11](https://elixir.bootlin.com/dpdk/v18.11/source/kernel/linux/kni/kni_misc.c), and the default value for it is "off". We need to load `rte_kni.ko` with extra parameter `carrier=on` to make KNI devices work properly. -> 5. Following the DPDK technical board decision and refinement, the KNI kernel module, library and PMD was removed from the DPDK 23.11 release (refer to [ABI and API Deprecation(DPDK 22.11)](https://doc.dpdk.org/guides-22.11/rel_notes/deprecation.html)). As a replacement solution, DPVS has supported [virtio-user as exception path](https://doc.dpdk.org/guides/howto/virtio_user_as_exception_path.html), which is default off now and can be enabled with `CONFIG_KNI_VIRTIO_USER` in config.mk. +> 2. `dpdk-devbind.py -u` can be used to unbind driver and switch it back to Linux driver. Use `lspci` or `ethtool -i eth0` to check the NIC's PCI bus-id. Please refer to [DPDK Doc:Binding and Unbinding Network Ports to/from the Kernel Modules](https://doc.dpdk.org/guides/linux_gsg/linux_drivers.html#binding-and-unbinding-network-ports-to-from-the-kernel-modules) for more details. +> 3. NVIDIA/Mellanox NIC uses bifurcated driver which doesn't rely on UIO/VFIO driver, so NOT bind any DPDK driver kernel module, but [NVIDIA MLNX_OFED/EN](https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/) is required. Refer to [Mellanox DPDK](https://enterprise-support.nvidia.com/s/article/mellanox-dpdk) for its PMD and [Compilation Prerequisites](https://doc.dpdk.org/guides/platform/mlx5.html#linux-prerequisites) for OFED installation. +> 4. A kernel module parameter `carrier` was added to `rte_kni.ko` for DPDK 18.11~23.11, and the default value for it is "off". We need to load `rte_kni.ko` with extra parameter `carrier=on` to make KNI devices work properly. +> 5. Following the DPDK technical board decision and refinement, the KNI kernel module, library and PMD was removed from the DPDK 23.11 release (refer to [ABI and API Deprecation(DPDK 22.11)](https://doc.dpdk.org/guides-22.11/rel_notes/deprecation.html)). As a replacement solution, DPVS has supported [virtio-user as exception path](https://doc.dpdk.org/guides/howto/virtio_user_as_exception_path.html) since v1.9.10, which is default off in v1.9 and can be enabled with `CONFIG_KNI_VIRTIO_USER` in config.mk. > 6. Multiple DPVS instances can run on a single server if there are enough NICs or VFs within one NIC. Refer to [tutorial:Multiple Instances](https://github.com/iqiyi/dpvs/blob/devel/doc/tutorial.md#multi-instance) for details. ## Build DPVS @@ -212,7 +221,7 @@ $ ./dpvs & $ # alternatively and strongly advised, start DPVS with NIC and CPU explicitly specified: $ ./dpvs -- -a 0000:06:00.0 -l 1-9 ``` -> Notes: +> Tips: > 1. Run `./dpvs --help` for DPVS supported command line options, and `./dpvs -- --help` for common DPDK EAL command line options. > 2. The default `dpvs.conf` require 9 CPUs(1 master worker, 8 slave workers), modify it if not so many available CPUs in your system. diff --git a/conf/dpvs.bond.conf.sample b/conf/dpvs.bond.conf.sample index cc9bb66d8..099117627 100644 --- a/conf/dpvs.bond.conf.sample +++ b/conf/dpvs.bond.conf.sample @@ -35,6 +35,7 @@ netif_defs { tx { queue_number 8 descriptor_number 1024 + mbuf_fast_free on } ! mtu 1500 ! promisc_mode @@ -51,6 +52,7 @@ netif_defs { tx { queue_number 8 descriptor_number 1024 + mbuf_fast_free on } ! mtu 1500 ! promisc_mode @@ -68,6 +70,7 @@ netif_defs { tx { queue_number 8 descriptor_number 1024 + mbuf_fast_free on } ! mtu 1500 ! promisc_mode @@ -84,6 +87,7 @@ netif_defs { tx { queue_number 8 descriptor_number 1024 + mbuf_fast_free on } ! mtu 1500 ! promisc_mode diff --git a/conf/dpvs.conf.items b/conf/dpvs.conf.items index 08a37ae06..b4e957f4a 100644 --- a/conf/dpvs.conf.items +++ b/conf/dpvs.conf.items @@ -38,6 +38,8 @@ netif_defs { tx { queue_number 6 <16, 0-16> descriptor_number 512 <512, 16-8192> + mbuf_fast_free on ## Disable it when ports used for two-arm forwarding + ## located at different NUMA nodes. } ! mtu 1500 <1500,0-9000> ! promisc_mode diff --git a/conf/dpvs.conf.sample b/conf/dpvs.conf.sample index 8c40b5ad3..8afb2dd68 100644 --- a/conf/dpvs.conf.sample +++ b/conf/dpvs.conf.sample @@ -35,6 +35,7 @@ netif_defs { tx { queue_number 8 descriptor_number 1024 + mbuf_fast_free on } ! mtu 1500 ! promisc_mode @@ -51,6 +52,7 @@ netif_defs { tx { queue_number 8 descriptor_number 1024 + mbuf_fast_free on } ! mtu 1500 ! promisc_mode diff --git a/config.mk b/config.mk index 48d073291..0b86feda2 100644 --- a/config.mk +++ b/config.mk @@ -4,11 +4,9 @@ export CONFIG_DPVS_MAX_LCORE=64 ## modules export CONFIG_DPVS_AGENT=n -export CONFIG_IXGEB_PMD=y export CONFIG_DPVS_LOG=y export CONFIG_PDUMP=y export CONFIG_ICMP_REDIRECT_CORE=n -export CONFIG_KNI_VIRTIO_USER=n # debugging and logging export CONFIG_DEBUG=n diff --git a/doc/TODO.md b/doc/TODO.md index 7961406da..e58a19c8a 100644 --- a/doc/TODO.md +++ b/doc/TODO.md @@ -3,16 +3,17 @@ DPVS TODO list * [x] IPv6 Support * [x] Documents update -* [ ] NIC without Flow-Director (FDIR) +* [x] NIC without Flow-Director (FDIR) - [x] Packet redirect to workers - [ ] RSS pre-calcuating - - [ ] Replace fdir with Generic Flow(rte_flow) + - [x] Replace fdir with Generic Flow(rte_flow) * [x] Merge DPDK stable 18.11 -* [ ] Merge DPDK stable 20.11 +* [x] Merge DPDK stable 20.11 +* [x] Merge DPDK stable 24.11 * [x] Service whitelist ACL -* [ ] IPset Support +* [x] IPset Support - [ ] SNAT ACL with IPset - - [ ] TC policing with IPset + - [x] TC policing with IPset * [x] Refactor Keepalived (porting latest stable keepalived) * [ ] Keepalived stability test and optimization. * [x] Packet Capture and Tcpdump Support @@ -21,13 +22,13 @@ DPVS TODO list - [ ] Session based logging (creation, expire, statistics) * [x] CI, Test Automation Setup * [ ] Performance Optimization - - [ ] Performance test tools and docs + - [x] Performance test tools and docs - [x] CPU Performance Tuning - [x] Memory Performance Tuning - [ ] Numa-aware NIC - [ ] Minimal Running Resource - [x] KNI performance Tuning - - [ ] Multi-core Performance Tuning + - [x] Multi-core Performance Tuning - [x] TC performance Tuning * [x] 25G/40G NIC Supports * [ ] VxLAN Support diff --git a/doc/Worker-Performance-Tuning.md b/doc/Worker-Performance-Tuning.md index acbf892d6..70d2a9778 100644 --- a/doc/Worker-Performance-Tuning.md +++ b/doc/Worker-Performance-Tuning.md @@ -61,7 +61,7 @@ Generally speaking, we may follow some practical rules below to choose the CPU c You can get the CPU layout of your system by the script provided by DPDK `cpu_layout.py `, example as shown below. ``` -[root@~ dpdk]# python dpdk-stable-18.11.2/usertools/cpu_layout.py +[root@~ dpdk]# python [DPDK-SOURCE]/usertools/cpu_layout.py ====================================================================== Core and Socket Information (as reported by '/sys/devices/system/cpu') ====================================================================== diff --git a/doc/faq.md b/doc/faq.md index e61ae86e9..e493ab712 100644 --- a/doc/faq.md +++ b/doc/faq.md @@ -18,6 +18,8 @@ DPVS Frequently Asked Questions (FAQ) * [Does DPVS support Bonding/VLAN/Tunnel ?](#vir-dev) * [Why CPU usages are 100% when running DPVS ?](#cpu-100) * [Does iptables conflict with DPVS ?](#iptables) +* [Why DPVS exits due to "Cause: failed to init tc: no memory"?](#no-memory) +* [Why IPv6 is not supported by my Keepalived?](#keepalived-ipv6) ------------------------------------------------- @@ -223,3 +225,24 @@ It's normal, not issue. Since DPDK application is using busy-polling mode. Every ### Does iptables conflict with DPVS ? Yes, DPDK is kernel-bypass solution, all forwarding traffic in data plane do not get into the Linux Kernel, it means `iptables`(Netfilter) won't work for that kind of traffic. + + + + +### Why DPVS exits due to "Cause: failed to init tc: no memory"? + +1. Check hugepage configurations on your system. Adequate free hugepages must be available for DPVS. Generally, 8GB free hugepages on each NUMA node would be enough for running DPVS with default configs. + +2. Check NUMA supports of your DPDK compilation. DPVS by default uses 2 NUMA nodes. If your system is not NUMA-aware, set the macro in config.mk `CONFIG_DPVS_MAX_SOCKET=1`. Otherwise, ensure `numactld-devel` package has installed before DPDK compilation. + + + +### Why IPv6 is not supported by my Keepalived? + +Keepalived IPv6 requires libnl3. Please install `libnl3-devel` package and recompile DPVS. + +```sh +make clean +make distclean +make +``` diff --git a/doc/tutorial.md b/doc/tutorial.md index 5bcf49817..c21b2a35b 100644 --- a/doc/tutorial.md +++ b/doc/tutorial.md @@ -1481,9 +1481,10 @@ $ The `dpdk-pdump` runs as a DPDK secondary process and is capable of enabling packet capture on dpdk ports. DPVS works as the primary process for dpdk-pdump, which should enable the packet capture framework by setting `global_defs/pdump` to be `on` in `/etc/dpvs.conf` when DPVS starts up. -Refer to [dpdk-pdump doc](https://doc.dpdk.org/guides/tools/pdump.html) for its usage. DPVS extends dpdk-pdump with a [DPDK patch](../patch/dpdk-stable-18.11.2/0005-enable-pdump-and-change-dpdk-pdump-tool-for-dpvs.patch) to add some packet filtering features. Run `dpdk-pdump -- --help` to find all supported pdump params. +Refer to [dpdk-pdump doc](https://doc.dpdk.org/guides/tools/pdump.html) for its usage. DPVS extends dpdk-pdump with a [DPDK patch](../patch/dpdk-24.11/0001-pdump-add-cmdline-packet-filters-for-dpdk-pdump-tool.patch) to add some packet filtering features. Run `dpdk-pdump -- --help` to find all supported pdump params. -> usage: dpdk-pdump [EAL options] -- --pdump '(port= | device_id=),(queue=),(rx-dev= | tx-dev=,[host= | src-host= |dst-host=],[proto=support:tcp/udp/icmp],[proto-port= |src-port= |dst-port=],[ring-size=default:16384],[mbuf-size=default:2176],[total-num-mbufs=default:65535]' +> usage: ./bin/dpdk-pdump [EAL options] -- --[multi] +> --pdump '(port= | device_id=),(queue=),(rx-dev= | tx-dev=,[host= | src-host= |dst-host=],[proto=support:tcp/udp/icmp/icmp6],[proto-port= |src-port= |dst-port=],[ring-size=default:16384],[mbuf-size=default:2176],[total-num-mbufs=default:65535]' Well, it's time to demonstrate how to use dpdk-pdump with our test case. diff --git a/include/conf/netif.h b/include/conf/netif.h index da11e402a..87fc367b8 100644 --- a/include/conf/netif.h +++ b/include/conf/netif.h @@ -112,6 +112,7 @@ typedef struct netif_nic_basic_get uint16_t ol_tx_tcp_csum:1; uint16_t ol_tx_udp_csum:1; uint16_t lldp:1; + uint16_t ol_tx_fast_free:1; } netif_nic_basic_get_t; /* nic statistics specified by port_id */ diff --git a/include/dpdk.h b/include/dpdk.h index bf7344dca..c6dddd9a0 100644 --- a/include/dpdk.h +++ b/include/dpdk.h @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -44,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -56,7 +58,6 @@ #include #include #include -#include #include "mbuf.h" #ifndef CONFIG_KNI_VIRTIO_USER #include diff --git a/include/kni.h b/include/kni.h index 2c72386af..9351dfdf2 100644 --- a/include/kni.h +++ b/include/kni.h @@ -29,9 +29,15 @@ #include #include "netif.h" #include "netif_flow.h" +#include "linux_if.h" +#include "dpdk.h" #define MAX_KNI_FLOW 2 +#ifndef RTE_LOGTYPE_Kni +#define RTE_LOGTYPE_Kni RTE_LOGTYPE_USER1 +#endif + struct kni_addr_flow { struct list_head node; int af; @@ -64,12 +70,35 @@ static inline bool kni_dev_running(const struct netif_port *dev) } #ifdef CONFIG_KNI_VIRTIO_USER +static inline void disable_kni_tx_csum_offload(const char *ifname) +{ + // TODO: Support tx-csum offload on virtio-user kni device. + struct { + struct ethtool_gfeatures hdr; + struct ethtool_get_features_block blocks[1]; + } gfeatures; + + if (linux_get_if_features(ifname, 1, (struct ethtool_gfeatures *)&gfeatures) < 0) + RTE_LOG(WARNING, Kni, "linux_get_if_features(%s) failed\n", ifname); + else if (gfeatures.blocks[0].requested & 0x1A + /* NETIF_F_IP_CSUM_BIT|NETIF_F_HW_CSUM_BIT|NETIF_F_IPV6_CSUM_BIT */) + RTE_LOG(INFO, Kni, "%s: tx-csum offload supported but to be disabled on %s!\n", + __func__, ifname); + + // Disable tx-csum offload, and delegate the task to device driver. + if (linux_set_tx_csum_offload(ifname, 0) < 0) + RTE_LOG(WARNING, Kni, "failed to disable tx-csum offload on %s\n", ifname); +} + static inline void kni_tx_csum(struct rte_mbuf *mbuf) { // TODO: // Support tx-csum offload on virtio-user kni device. } #else +// rte_kni doesn't support tx-csum offload feature +static inline void disable_kni_tx_csum_offload(const char *ifname) {} + static inline void kni_handle_request(const struct netif_port *dev) { if (!kni_dev_exist(dev)) diff --git a/include/mbuf.h b/include/mbuf.h index 7fb013a48..defa568d0 100644 --- a/include/mbuf.h +++ b/include/mbuf.h @@ -145,7 +145,7 @@ struct rte_mbuf *mbuf_copy(struct rte_mbuf *md, struct rte_mempool *mp); void mbuf_copy_metadata(struct rte_mbuf *mi, struct rte_mbuf *m); #ifdef CONFIG_DPVS_MBUF_DEBUG -inline void dp_vs_mbuf_dump(const char *msg, int af, const struct rte_mbuf *mbuf); +void dp_vs_mbuf_dump(const char *msg, int af, const struct rte_mbuf *mbuf); #endif void *mbuf_userdata(struct rte_mbuf *, mbuf_usedata_field_t); diff --git a/include/netif.h b/include/netif.h index 72f599bf7..b2bb4dd26 100644 --- a/include/netif.h +++ b/include/netif.h @@ -49,6 +49,7 @@ enum { NETIF_PORT_FLAG_TC_INGRESS = (0x1<<11), NETIF_PORT_FLAG_NO_ARP = (0x1<<12), NETIF_PORT_FLAG_LLDP = (0x1<<13), + NETIF_PORT_FLAG_TX_MBUF_FAST_FREE = (0x1<<14), }; /* max tx/rx queue number for each nic */ @@ -86,6 +87,9 @@ struct netif_queue_conf queueid_t id; uint16_t len; struct rx_partner *isol_rxq; +#ifdef CONFIG_DPVS_NETIF_DEBUG + struct rte_mempool *pktpool; /* for RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE */ +#endif struct rte_mbuf *mbufs[NETIF_MAX_PKT_BURST]; } __rte_cache_aligned; @@ -287,6 +291,7 @@ int netif_unregister_pkt(struct pkt_type *pt); struct netif_port* netif_port_get(portid_t id); /* get netif by name, fail return NULL */ struct netif_port* netif_port_get_by_name(const char *name); +bool netif_flow_fuzzy_match(void); bool is_physical_port(portid_t pid); bool is_bond_port(portid_t pid); void netif_physical_port_range(portid_t *start, portid_t *end); diff --git a/patch/dcdn-toa.patch b/patch/dcdn-toa.patch index 3d9c84a9b..a299ee0f9 100644 --- a/patch/dcdn-toa.patch +++ b/patch/dcdn-toa.patch @@ -1,12 +1,13 @@ -From cee6889685240558ebea795615539b7289070842 Mon Sep 17 00:00:00 2001 -From: wangyetong -Date: Thu, 14 Sep 2023 15:33:42 +0800 -Subject: [PATCH] added dcdn toa +From 7fcfff83ec23d1a0d2e475cf6e5bdfd459f83411 Mon Sep 17 00:00:00 2001 +From: ywc689 +Date: Mon, 23 Dec 2024 17:04:51 +0800 +Subject: [PATCH] add patch for dcdn toa +Signed-off-by: ywc689 --- - include/ipvs/conn.h | 4 ++++ + include/ipvs/conn.h | 4 +++ include/ipvs/proto_tcp.h | 2 ++ - src/ipvs/ip_vs_proto_tcp.c | 55 +++++++++++++++++++++++++++++++++++++++++++++- + src/ipvs/ip_vs_proto_tcp.c | 55 +++++++++++++++++++++++++++++++++++++- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/include/ipvs/conn.h b/include/ipvs/conn.h @@ -45,10 +46,10 @@ index 3d1515a..f0cf50c 100644 #define TCP_OLEN_TSTAMP_ALIGNED 12 #define TCP_OLEN_SACK_BASE 2 diff --git a/src/ipvs/ip_vs_proto_tcp.c b/src/ipvs/ip_vs_proto_tcp.c -index 6acbbca..5b185fa 100644 +index 6e754cf..76a4fae 100644 --- a/src/ipvs/ip_vs_proto_tcp.c +++ b/src/ipvs/ip_vs_proto_tcp.c -@@ -441,6 +441,43 @@ static int tcp_in_add_proxy_proto(struct dp_vs_conn *conn, struct rte_mbuf *mbuf +@@ -482,6 +482,43 @@ static int tcp_in_add_proxy_proto(struct dp_vs_conn *conn, struct rte_mbuf *mbuf return proxy_proto_insert(&ppinfo, conn, mbuf, tcph, hdr_shift); } @@ -92,7 +93,7 @@ index 6acbbca..5b185fa 100644 static int tcp_in_add_toa(struct dp_vs_conn *conn, struct rte_mbuf *mbuf, struct tcphdr *tcph) { -@@ -518,7 +555,10 @@ static int tcp_in_add_toa(struct dp_vs_conn *conn, struct rte_mbuf *mbuf, +@@ -559,7 +596,10 @@ static int tcp_in_add_toa(struct dp_vs_conn *conn, struct rte_mbuf *mbuf, if (conn->af == AF_INET) { struct tcpopt_ip4_addr *toa_ip4 = (struct tcpopt_ip4_addr *)(tcph + 1); @@ -104,9 +105,9 @@ index 6acbbca..5b185fa 100644 } else { struct tcpopt_ip6_addr *toa_ip6 = (struct tcpopt_ip6_addr *)(tcph + 1); -@@ -842,6 +882,10 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto, +@@ -884,6 +924,10 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto, int af; /* outbound af */ - int iphdrlen; + int iphdrlen, toalen; int err, pp_hdr_shift = 0; + struct in_addr dcdn_addr; +#ifdef CONFIG_DPVS_IPVS_DEBUG @@ -115,7 +116,7 @@ index 6acbbca..5b185fa 100644 af = tuplehash_out(conn).af; iphdrlen = ((AF_INET6 == af) ? ip6_hdrlen(mbuf): ip4_hdrlen(mbuf)); -@@ -866,6 +910,15 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto, +@@ -908,6 +952,15 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto, if (th->syn && !th->ack) { tcp_in_remove_ts(th); tcp_in_init_seq(conn, mbuf, th); @@ -132,5 +133,5 @@ index 6acbbca..5b185fa 100644 /* Add toa/proxy_protocol to the first data packet */ -- -1.8.3.1 +2.31.1 diff --git a/patch/dpdk-24.11/0001-pdump-add-cmdline-packet-filters-for-dpdk-pdump-tool.patch b/patch/dpdk-24.11/0001-pdump-add-cmdline-packet-filters-for-dpdk-pdump-tool.patch new file mode 100644 index 000000000..a25d2ec1b --- /dev/null +++ b/patch/dpdk-24.11/0001-pdump-add-cmdline-packet-filters-for-dpdk-pdump-tool.patch @@ -0,0 +1,725 @@ +From a152d34e729c5597ec8eebc6ba46a644e87a810f Mon Sep 17 00:00:00 2001 +From: ywc689 +Date: Fri, 13 Dec 2024 17:06:14 +0800 +Subject: [PATCH 1/6] pdump: add cmdline packet filters for dpdk-pdump tool + +Signed-off-by: ywc689 +--- + app/pdump/main.c | 171 ++++++++++++++++++++++++++++++++++++++-- + lib/pdump/rte_pdump.c | 176 ++++++++++++++++++++++++++++++++++++++---- + lib/pdump/rte_pdump.h | 39 ++++++++-- + 3 files changed, 358 insertions(+), 28 deletions(-) + +diff --git a/app/pdump/main.c b/app/pdump/main.c +index fa85859..6b35a6c 100644 +--- a/app/pdump/main.c ++++ b/app/pdump/main.c +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + + #define CMD_LINE_OPT_PDUMP "pdump" + #define CMD_LINE_OPT_PDUMP_NUM 256 +@@ -42,6 +43,14 @@ + #define PDUMP_MSIZE_ARG "mbuf-size" + #define PDUMP_NUM_MBUFS_ARG "total-num-mbufs" + ++#define PDUMP_HOST_ARG "host" ++#define PDUMP_SRC_ARG "src-host" ++#define PDUMP_DST_ARG "dst-host" ++#define PDUMP_PROTO_PORT_ARG "proto-port" ++#define PDUMP_SPORT_ARG "src-port" ++#define PDUMP_DPORT_ARG "dst-port" ++#define PDUMP_PROTO_ARG "proto" ++ + #define VDEV_NAME_FMT "net_pcap_%s_%d" + #define VDEV_PCAP_ARGS_FMT "tx_pcap=%s" + #define VDEV_IFACE_ARGS_FMT "tx_iface=%s" +@@ -97,6 +106,13 @@ static const char * const valid_pdump_arguments[] = { + PDUMP_RING_SIZE_ARG, + PDUMP_MSIZE_ARG, + PDUMP_NUM_MBUFS_ARG, ++ PDUMP_HOST_ARG, ++ PDUMP_SRC_ARG, ++ PDUMP_DST_ARG, ++ PDUMP_PROTO_PORT_ARG, ++ PDUMP_SPORT_ARG, ++ PDUMP_DPORT_ARG, ++ PDUMP_PROTO_ARG, + NULL + }; + +@@ -131,6 +147,9 @@ struct __rte_cache_aligned pdump_tuples { + enum pcap_stream tx_vdev_stream_type; + bool single_pdump_dev; + ++ /* cmdline packet filter */ ++ struct pdump_filter *filter0; ++ + /* stats */ + struct pdump_stats stats; + }; +@@ -158,6 +177,11 @@ pdump_usage(const char *prgname) + "(queue=)," + "(rx-dev= |" + " tx-dev=," ++ "[host= | src-host= |" ++ "dst-host=]," ++ "[proto=support:tcp/udp/icmp/icmp6]," ++ "[proto-port= |src-port= |" ++ "dst-port=]," + "[ring-size=default:16384]," + "[mbuf-size=default:2176]," + "[total-num-mbufs=default:65535]'\n", +@@ -246,6 +270,66 @@ parse_uint_value(const char *key, const char *value, void *extra_args) + return 0; + } + ++static int ++parse_host(const char *key __rte_unused, const char *value, void *extra_args) ++{ ++ struct pdump_tuples *pt = extra_args; ++ struct in_addr inaddr; ++ struct in6_addr inaddr6; ++ union addr addr; ++ int af = 0; ++ ++ if (inet_pton(AF_INET6, value, &inaddr6) > 0) { ++ af = AF_INET6; ++ addr.in6 = inaddr6; ++ } else if (inet_pton(AF_INET, value, &inaddr) > 0){ ++ af = AF_INET; ++ addr.in = inaddr; ++ } else { ++ printf("IP address invaled\n"); ++ return -EINVAL; ++ } ++ ++ if (pt->filter0 && pt->filter0->af != 0 && af != pt->filter0->af) { ++ printf("IPv4 and IPv6 conflict\n"); ++ return -EINVAL; ++ } else { ++ pt->filter0->af = af; ++ } ++ ++ if (!strcmp(key, PDUMP_HOST_ARG)) { ++ rte_memcpy(&pt->filter0->host_addr, &addr, sizeof(addr)); ++ } else if (!strcmp(key, PDUMP_SRC_ARG)) { ++ rte_memcpy(&pt->filter0->s_addr, &addr, sizeof(addr)); ++ } else if (!strcmp(key, PDUMP_DST_ARG)) { ++ rte_memcpy(&pt->filter0->d_addr, &addr, sizeof(addr)); ++ } ++ ++ return 0; ++} ++ ++static int ++parse_proto(const char *key __rte_unused, const char *value, void *extra_args) ++{ ++ struct pdump_tuples *pt = extra_args; ++ ++ if (!strcmp(value, "tcp")) { ++ pt->filter0->proto = IPPROTO_TCP; ++ } else if (!strcmp(value, "udp")) { ++ pt->filter0->proto = IPPROTO_UDP; ++ } else if (!strcmp(value, "icmp6")) { ++ pt->filter0->proto = IPPROTO_ICMPV6; ++ } else if (!strcmp(value, "icmp")) { ++ pt->filter0->proto = IPPROTO_ICMP; ++ } else { ++ printf("invalid value:\"%s\" for key:\"%s\", " ++ "value must be tcp/udp/icmp/icmp6\n", value, key); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ + static int + parse_pdump(const char *optarg) + { +@@ -373,6 +457,75 @@ parse_pdump(const char *optarg) + } else + pt->total_num_mbufs = MBUFS_PER_POOL; + ++ /* filter parsing and validation */ ++ pt->filter0 = rte_zmalloc("pdump_filter", ++ sizeof(struct pdump_filter), 0); ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_HOST_ARG); ++ if (cnt1 == 1) { ++ ret = rte_kvargs_process(kvlist, PDUMP_HOST_ARG, ++ &parse_host, pt); ++ if (ret < 0) ++ goto free_kvlist; ++ } ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_SRC_ARG); ++ if (cnt1 == 1) { ++ ret = rte_kvargs_process(kvlist, PDUMP_SRC_ARG, ++ &parse_host, pt); ++ if (ret < 0) ++ goto free_kvlist; ++ } ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_DST_ARG); ++ if (cnt1 == 1) { ++ ret = rte_kvargs_process(kvlist, PDUMP_DST_ARG, ++ &parse_host, pt); ++ if (ret < 0) ++ goto free_kvlist; ++ } ++ ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_PORT_ARG); ++ if (cnt1 == 1) { ++ v.min = 1; ++ v.max = UINT16_MAX; ++ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_PORT_ARG, ++ &parse_uint_value, &v); ++ if (ret < 0) ++ goto free_kvlist; ++ pt->filter0->proto_port = (uint16_t) v.val; ++ } ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_SPORT_ARG); ++ if (cnt1 == 1) { ++ v.min = 1; ++ v.max = UINT16_MAX; ++ ret = rte_kvargs_process(kvlist, PDUMP_SPORT_ARG, ++ &parse_uint_value, &v); ++ if (ret < 0) ++ goto free_kvlist; ++ pt->filter0->s_port = (uint16_t) v.val; ++ } ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_DPORT_ARG); ++ if (cnt1 == 1) { ++ v.min = 1; ++ v.max = UINT16_MAX; ++ ret = rte_kvargs_process(kvlist, PDUMP_DPORT_ARG, ++ &parse_uint_value, &v); ++ if (ret < 0) ++ goto free_kvlist; ++ pt->filter0->d_port = (uint16_t) v.val; ++ } ++ ++ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_ARG); ++ if (cnt1 == 1) { ++ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_ARG, ++ &parse_proto, pt); ++ if (ret < 0) ++ goto free_kvlist; ++ } ++ + num_tuples++; + + free_kvlist: +@@ -511,6 +664,8 @@ cleanup_rings(void) + rte_ring_free(pt->rx_ring); + rte_ring_free(pt->tx_ring); + rte_mempool_free(pt->mp); ++ if (pt->filter0) ++ rte_free(pt->filter0); + } + } + +@@ -831,20 +986,20 @@ enable_pdump(void) + pt->queue, + RTE_PDUMP_FLAG_RX, + pt->rx_ring, +- pt->mp, NULL); ++ pt->mp, pt->filter0); + ret1 = rte_pdump_enable_by_deviceid( + pt->device_id, + pt->queue, + RTE_PDUMP_FLAG_TX, + pt->tx_ring, +- pt->mp, NULL); ++ pt->mp, pt->filter0); + } else if (pt->dump_by_type == PORT_ID) { + ret = rte_pdump_enable(pt->port, pt->queue, + RTE_PDUMP_FLAG_RX, +- pt->rx_ring, pt->mp, NULL); ++ pt->rx_ring, pt->mp, pt->filter0); + ret1 = rte_pdump_enable(pt->port, pt->queue, + RTE_PDUMP_FLAG_TX, +- pt->tx_ring, pt->mp, NULL); ++ pt->tx_ring, pt->mp, pt->filter0); + } + } else if (pt->dir == RTE_PDUMP_FLAG_RX) { + if (pt->dump_by_type == DEVICE_ID) +@@ -852,22 +1007,22 @@ enable_pdump(void) + pt->device_id, + pt->queue, + pt->dir, pt->rx_ring, +- pt->mp, NULL); ++ pt->mp, pt->filter0); + else if (pt->dump_by_type == PORT_ID) + ret = rte_pdump_enable(pt->port, pt->queue, + pt->dir, +- pt->rx_ring, pt->mp, NULL); ++ pt->rx_ring, pt->mp, pt->filter0); + } else if (pt->dir == RTE_PDUMP_FLAG_TX) { + if (pt->dump_by_type == DEVICE_ID) + ret = rte_pdump_enable_by_deviceid( + pt->device_id, + pt->queue, + pt->dir, +- pt->tx_ring, pt->mp, NULL); ++ pt->tx_ring, pt->mp, pt->filter0); + else if (pt->dump_by_type == PORT_ID) + ret = rte_pdump_enable(pt->port, pt->queue, + pt->dir, +- pt->tx_ring, pt->mp, NULL); ++ pt->tx_ring, pt->mp, pt->filter0); + } + if (ret < 0 || ret1 < 0) { + cleanup_pdump_resources(); +diff --git a/lib/pdump/rte_pdump.c b/lib/pdump/rte_pdump.c +index 679c3dd..f9b8a3e 100644 +--- a/lib/pdump/rte_pdump.c ++++ b/lib/pdump/rte_pdump.c +@@ -12,6 +12,10 @@ + #include + #include + #include ++#include ++#include ++#include ++#include + + #include "rte_pdump.h" + +@@ -43,9 +47,12 @@ struct pdump_request { + uint16_t queue; + struct rte_ring *ring; + struct rte_mempool *mp; +- ++ /* cmdline packet filterr */ ++ const struct pdump_filter *filter0; ++ /* eBpf packet filter */ + const struct rte_bpf_prm *prm; + uint32_t snaplen; ++ + }; + + struct pdump_response { +@@ -58,12 +65,139 @@ static struct pdump_rxtx_cbs { + struct rte_ring *ring; + struct rte_mempool *mp; + const struct rte_eth_rxtx_callback *cb; +- const struct rte_bpf *filter; ++ const struct pdump_filter *filter0; /* cmdline packet filterr */ ++ const struct rte_bpf *filter; /* eBpf packet filter */ + enum pdump_version ver; + uint32_t snaplen; + } rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT], + tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT]; + ++static int ++inet_addr_equal(int af, const union addr *a1, ++ const union addr *a2) ++{ ++ switch (af) { ++ case AF_INET: ++ return a1->in.s_addr == a2->in.s_addr; ++ case AF_INET6: ++ return memcmp(a1->in6.s6_addr, a2->in6.s6_addr, 16) == 0; ++ default: ++ return memcmp(a1, a2, sizeof(union addr)) == 0; ++ } ++} ++ ++static int ++inet_is_addr_any(int af, const union addr *addr) ++{ ++ switch (af) { ++ case AF_INET: ++ return addr->in.s_addr == htonl(INADDR_ANY); ++ case AF_INET6: ++ return IN6_ARE_ADDR_EQUAL(&addr->in6, &in6addr_any); ++ default: ++ return -1; ++ } ++ ++ return -1; ++} ++static int ++pdump_filter(struct rte_mbuf *m, const struct pdump_filter *filter0) ++{ ++ struct rte_ether_hdr *eth_hdr; ++ struct vlan_eth_hdr *vlan_eth_hdr; ++ union addr s_addr, d_addr; ++ int prepend = 0; ++ uint16_t type = 0; ++ uint16_t iph_len = 0; ++ uint8_t proto = 0; ++ ++ int af; ++ ++ if (filter0->af == 0 && filter0->s_port == 0 && ++ filter0->d_port == 0 && filter0->proto == 0 && ++ filter0->proto_port == 0) ++ return 0; ++ ++ eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); ++ ++ if (eth_hdr->ether_type == htons(ETH_P_8021Q)) { ++ prepend += sizeof(struct vlan_eth_hdr); ++ vlan_eth_hdr = rte_pktmbuf_mtod(m, struct vlan_eth_hdr *); ++ type = vlan_eth_hdr->h_vlan_encapsulated_proto; ++ } else { ++ prepend += sizeof(struct rte_ether_hdr); ++ eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); ++ type = eth_hdr->ether_type; ++ } ++ ++ if (rte_pktmbuf_adj(m, prepend) == NULL) ++ goto prepend; ++ ++ if (type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) { ++ struct rte_arp_hdr *arp = rte_pktmbuf_mtod(m, struct rte_arp_hdr *); ++ af = AF_INET; ++ s_addr.in.s_addr = arp->arp_data.arp_sip; ++ d_addr.in.s_addr = arp->arp_data.arp_tip; ++ } else if (type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) { ++ struct rte_ipv4_hdr *ip4 = rte_pktmbuf_mtod(m, struct rte_ipv4_hdr *); ++ af = AF_INET; ++ s_addr.in.s_addr = ip4->src_addr; ++ d_addr.in.s_addr = ip4->dst_addr; ++ proto = ip4->next_proto_id; ++ iph_len = (ip4->version_ihl & 0xf) << 2; ++ } else if (type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) { ++ struct rte_ipv6_hdr *ip6 = rte_pktmbuf_mtod(m, struct rte_ipv6_hdr *); ++ af = AF_INET6; ++ rte_memcpy(&s_addr.in6, &ip6->src_addr, 16); ++ rte_memcpy(&d_addr.in6, &ip6->dst_addr, 16); ++ proto = ip6->proto; ++ iph_len = sizeof(struct rte_ipv6_hdr); ++ } else { ++ goto prepend; ++ } ++ ++ /* do filter */ ++ if (!inet_is_addr_any(af, &filter0->s_addr) && ++ !inet_addr_equal(af, &filter0->s_addr, &s_addr)) ++ goto prepend; ++ if (!inet_is_addr_any(af, &filter0->d_addr) && ++ !inet_addr_equal(af, &filter0->d_addr, &d_addr)) ++ goto prepend; ++ if (!inet_is_addr_any(af, &filter0->host_addr) && ++ !inet_addr_equal(af, &filter0->host_addr, &s_addr) && ++ !inet_addr_equal(af, &filter0->host_addr, &d_addr)) ++ goto prepend; ++ ++ if (filter0->proto && filter0->proto != proto) ++ goto prepend; ++ ++ if (filter0->s_port || filter0->d_port || filter0->proto_port) { ++ if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) ++ goto prepend; ++ struct rte_udp_hdr _uh; ++ const struct rte_udp_hdr *uh; ++ uh = rte_pktmbuf_read(m, iph_len, sizeof(_uh), &_uh); ++ if (uh == NULL) ++ goto prepend; ++ if (filter0->s_port && filter0->s_port != rte_cpu_to_be_16(uh->src_port)) ++ goto prepend; ++ ++ if (filter0->d_port && filter0->d_port != rte_cpu_to_be_16(uh->dst_port)) ++ goto prepend; ++ ++ if (filter0->proto_port && ++ filter0->proto_port != rte_cpu_to_be_16(uh->src_port) && ++ filter0->proto_port != rte_cpu_to_be_16(uh->dst_port)) ++ goto prepend; ++ } ++ ++ rte_pktmbuf_prepend(m, prepend); ++ return 0; ++ ++prepend: ++ rte_pktmbuf_prepend(m, prepend); ++ return -1; ++} + + /* + * The packet capture statistics keep track of packets +@@ -112,6 +246,10 @@ pdump_copy(uint16_t port_id, uint16_t queue, + continue; + } + ++ /* Apply cmdline packet filters. */ ++ if (cbs->filter0 && pdump_filter(pkts[i], cbs->filter0) != 0) ++ continue; ++ + /* + * If using pcapng then want to wrap packets + * otherwise a simple copy. +@@ -169,6 +307,7 @@ static int + pdump_register_rx_callbacks(enum pdump_version ver, + uint16_t end_q, uint16_t port, uint16_t queue, + struct rte_ring *ring, struct rte_mempool *mp, ++ const struct pdump_filter *filter0, + struct rte_bpf *filter, + uint16_t operation, uint32_t snaplen) + { +@@ -190,6 +329,7 @@ pdump_register_rx_callbacks(enum pdump_version ver, + cbs->mp = mp; + cbs->snaplen = snaplen; + cbs->filter = filter; ++ cbs->filter0 = filter0; + + cbs->cb = rte_eth_add_first_rx_callback(port, qid, + pdump_rx, cbs); +@@ -226,6 +366,7 @@ static int + pdump_register_tx_callbacks(enum pdump_version ver, + uint16_t end_q, uint16_t port, uint16_t queue, + struct rte_ring *ring, struct rte_mempool *mp, ++ const struct pdump_filter *filter0, + struct rte_bpf *filter, + uint16_t operation, uint32_t snaplen) + { +@@ -248,6 +389,7 @@ pdump_register_tx_callbacks(enum pdump_version ver, + cbs->mp = mp; + cbs->snaplen = snaplen; + cbs->filter = filter; ++ cbs->filter0 = filter0; + + cbs->cb = rte_eth_add_tx_callback(port, qid, pdump_tx, + cbs); +@@ -287,6 +429,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p) + uint16_t port; + int ret = 0; + struct rte_bpf *filter = NULL; ++ const struct pdump_filter *filter0; + uint32_t flags; + uint16_t operation; + struct rte_ring *ring; +@@ -314,6 +457,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p) + return -rte_errno; + } + } ++ filter0 = p->filter0; + + flags = p->flags; + operation = p->op; +@@ -365,7 +509,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p) + if (flags & RTE_PDUMP_FLAG_RX) { + end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_rx_q : queue + 1; + ret = pdump_register_rx_callbacks(p->ver, end_q, port, queue, +- ring, mp, filter, ++ ring, mp, filter0, filter, + operation, p->snaplen); + if (ret < 0) + return ret; +@@ -375,7 +519,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p) + if (flags & RTE_PDUMP_FLAG_TX) { + end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_tx_q : queue + 1; + ret = pdump_register_tx_callbacks(p->ver, end_q, port, queue, +- ring, mp, filter, ++ ring, mp, filter0, filter, + operation, p->snaplen); + if (ret < 0) + return ret; +@@ -525,6 +669,7 @@ pdump_prepare_client_request(const char *device, uint16_t queue, + uint16_t operation, + struct rte_ring *ring, + struct rte_mempool *mp, ++ const struct pdump_filter *filter0, + const struct rte_bpf_prm *prm) + { + int ret = -1; +@@ -551,6 +696,7 @@ pdump_prepare_client_request(const char *device, uint16_t queue, + if ((operation & ENABLE) != 0) { + req->ring = ring; + req->mp = mp; ++ req->filter0 = filter0; + req->prm = prm; + req->snaplen = snaplen; + } +@@ -584,6 +730,7 @@ static int + pdump_enable(uint16_t port, uint16_t queue, + uint32_t flags, uint32_t snaplen, + struct rte_ring *ring, struct rte_mempool *mp, ++ const struct pdump_filter *filter0, + const struct rte_bpf_prm *prm) + { + int ret; +@@ -603,17 +750,17 @@ pdump_enable(uint16_t port, uint16_t queue, + snaplen = UINT32_MAX; + + return pdump_prepare_client_request(name, queue, flags, snaplen, +- ENABLE, ring, mp, prm); ++ ENABLE, ring, mp, filter0, prm); + } + + int + rte_pdump_enable(uint16_t port, uint16_t queue, uint32_t flags, + struct rte_ring *ring, + struct rte_mempool *mp, +- void *filter __rte_unused) ++ const struct pdump_filter *filter0) + { + return pdump_enable(port, queue, flags, 0, +- ring, mp, NULL); ++ ring, mp, filter0, NULL); + } + + int +@@ -624,7 +771,7 @@ rte_pdump_enable_bpf(uint16_t port, uint16_t queue, + const struct rte_bpf_prm *prm) + { + return pdump_enable(port, queue, flags, snaplen, +- ring, mp, prm); ++ ring, mp, NULL, prm); + } + + static int +@@ -632,6 +779,7 @@ pdump_enable_by_deviceid(const char *device_id, uint16_t queue, + uint32_t flags, uint32_t snaplen, + struct rte_ring *ring, + struct rte_mempool *mp, ++ const struct pdump_filter *filter0, + const struct rte_bpf_prm *prm) + { + int ret; +@@ -647,7 +795,7 @@ pdump_enable_by_deviceid(const char *device_id, uint16_t queue, + snaplen = UINT32_MAX; + + return pdump_prepare_client_request(device_id, queue, flags, snaplen, +- ENABLE, ring, mp, prm); ++ ENABLE, ring, mp, filter0, prm); + } + + int +@@ -655,10 +803,10 @@ rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue, + uint32_t flags, + struct rte_ring *ring, + struct rte_mempool *mp, +- void *filter __rte_unused) ++ const struct pdump_filter *filter0) + { + return pdump_enable_by_deviceid(device_id, queue, flags, 0, +- ring, mp, NULL); ++ ring, mp, filter0, NULL); + } + + int +@@ -669,7 +817,7 @@ rte_pdump_enable_bpf_by_deviceid(const char *device_id, uint16_t queue, + const struct rte_bpf_prm *prm) + { + return pdump_enable_by_deviceid(device_id, queue, flags, snaplen, +- ring, mp, prm); ++ ring, mp, NULL, prm); + } + + int +@@ -686,7 +834,7 @@ rte_pdump_disable(uint16_t port, uint16_t queue, uint32_t flags) + return ret; + + ret = pdump_prepare_client_request(name, queue, flags, 0, +- DISABLE, NULL, NULL, NULL); ++ DISABLE, NULL, NULL, NULL, NULL); + + return ret; + } +@@ -702,7 +850,7 @@ rte_pdump_disable_by_deviceid(char *device_id, uint16_t queue, + return ret; + + ret = pdump_prepare_client_request(device_id, queue, flags, 0, +- DISABLE, NULL, NULL, NULL); ++ DISABLE, NULL, NULL, NULL, NULL); + + return ret; + } +diff --git a/lib/pdump/rte_pdump.h b/lib/pdump/rte_pdump.h +index 1e32d46..b44b222 100644 +--- a/lib/pdump/rte_pdump.h ++++ b/lib/pdump/rte_pdump.h +@@ -15,6 +15,8 @@ + #include + + #include ++#include ++#include + + #ifdef __cplusplus + extern "C" { +@@ -31,6 +33,31 @@ enum { + RTE_PDUMP_FLAG_PCAPNG = 4, /* format for pcapng */ + }; + ++union addr { ++ struct in_addr in; ++ struct in6_addr in6; ++}; ++ ++struct pdump_filter { ++ int af; ++ union addr s_addr; ++ union addr d_addr; ++ union addr host_addr; //s_addr or d_addr ++ ++ uint8_t proto; ++ uint16_t proto_port; //s_port or d_port ++ uint16_t s_port; ++ uint16_t d_port; ++}; ++ ++struct vlan_eth_hdr { ++ unsigned char h_dest[ETH_ALEN]; ++ unsigned char h_source[ETH_ALEN]; ++ unsigned short h_vlan_proto; ++ unsigned short h_vlan_TCI; ++ unsigned short h_vlan_encapsulated_proto; ++}; ++ + /** + * Initialize packet capturing handling + * +@@ -69,8 +96,8 @@ rte_pdump_uninit(void); + * ring on which captured packets will be enqueued for user. + * @param mp + * mempool on to which original packets will be mirrored or duplicated. +- * @param filter +- * Unused should be NULL. ++ * @param filter0 ++ * cmdline packet filter + * + * @return + * 0 on success, -1 on error, rte_errno is set accordingly. +@@ -80,7 +107,7 @@ int + rte_pdump_enable(uint16_t port, uint16_t queue, uint32_t flags, + struct rte_ring *ring, + struct rte_mempool *mp, +- void *filter); ++ const struct pdump_filter *filter0); + + /** + * Enables packet capturing on given port and queue with filtering. +@@ -150,8 +177,8 @@ rte_pdump_disable(uint16_t port, uint16_t queue, uint32_t flags); + * ring on which captured packets will be enqueued for user. + * @param mp + * mempool on to which original packets will be mirrored or duplicated. +- * @param filter +- * unused should be NULL ++ * @param filter0 ++ * cmdline packet filter + * + * @return + * 0 on success, -1 on error, rte_errno is set accordingly. +@@ -162,7 +189,7 @@ rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue, + uint32_t flags, + struct rte_ring *ring, + struct rte_mempool *mp, +- void *filter); ++ const struct pdump_filter *filter0); + + /** + * Enables packet capturing on given device id and queue with filtering. +-- +2.31.1 + diff --git a/patch/dpdk-24.11/0002-debug-enable-dpdk-eal-memory-debug.patch b/patch/dpdk-24.11/0002-debug-enable-dpdk-eal-memory-debug.patch new file mode 100644 index 000000000..107ca9eb1 --- /dev/null +++ b/patch/dpdk-24.11/0002-debug-enable-dpdk-eal-memory-debug.patch @@ -0,0 +1,55 @@ +From 456164059079a114d9905b275a97788b46914ef4 Mon Sep 17 00:00:00 2001 +From: ywc689 +Date: Fri, 13 Dec 2024 17:33:04 +0800 +Subject: [PATCH 2/6] debug: enable dpdk eal memory debug + +Signed-off-by: ywc689 +--- + lib/eal/common/rte_malloc.c | 4 ++++ + lib/eal/include/rte_malloc.h | 15 +++++++++++++++ + 2 files changed, 19 insertions(+) + +diff --git a/lib/eal/common/rte_malloc.c b/lib/eal/common/rte_malloc.c +index 3eed4d4..3a57769 100644 +--- a/lib/eal/common/rte_malloc.c ++++ b/lib/eal/common/rte_malloc.c +@@ -25,6 +25,10 @@ + #include "eal_memcfg.h" + #include "eal_private.h" + ++int rte_memmory_ok(void *addr) ++{ ++ return malloc_elem_cookies_ok(RTE_PTR_SUB(addr, MALLOC_ELEM_HEADER_LEN)); ++} + + /* Free the memory space back to heap */ + static void +diff --git a/lib/eal/include/rte_malloc.h b/lib/eal/include/rte_malloc.h +index c8836de..d8b95a7 100644 +--- a/lib/eal/include/rte_malloc.h ++++ b/lib/eal/include/rte_malloc.h +@@ -273,6 +273,21 @@ rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int + __rte_alloc_size(2, 3) __rte_alloc_align(4) + __rte_malloc __rte_dealloc_free; + ++/** ++ * Check the header/tailer cookies of memory pointed to by the provided pointer. ++ * ++ * This pointer must have been returned by a previous call to ++ * rte_malloc(), rte_zmalloc(), rte_calloc() or rte_realloc(). ++ * ++ * @param ptr ++ * The pointer to memory to be checked. ++ * @return ++ * - true if the header/tailer cookies are OK. ++ * - Otherwise, false. ++ */ ++int ++rte_memmory_ok(void *ptr); ++ + /** + * If malloc debug is enabled, check a memory block for header + * and trailer markers to indicate that all is well with the block. +-- +2.31.1 + diff --git a/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch b/patch/dpdk-24.11/0003-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch similarity index 60% rename from patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch rename to patch/dpdk-24.11/0003-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch index ef7eda6d4..1c692199e 100644 --- a/patch/dpdk-stable-20.11.1/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch +++ b/patch/dpdk-24.11/0003-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch @@ -1,20 +1,25 @@ -From 965c6ebd04d49ba578bab321ea87768669a2c7d1 Mon Sep 17 00:00:00 2001 -From: huangyichen -Date: Fri, 2 Jul 2021 11:55:47 +0800 -Subject: [PATCH 4/6] ixgbe_flow: patch ixgbe fdir rte_flow for dpvs +From fb255fafabacd06fc36dd73b37386b4c3cba282c Mon Sep 17 00:00:00 2001 +From: ywc689 +Date: Mon, 16 Dec 2024 13:47:27 +0800 +Subject: [PATCH 3/6] ixgbe_flow: patch ixgbe fdir rte_flow for dpvs 1. Ignore fdir flow rule priority attribute. -2. Use different fdir soft-id for flow rules configured for the same queue. -3. Disable fdir mask settings by rte_flow. -4. Allow IPv6 to pass flow rule ETH item validation. -5. TCP & UDP flow item dest port = 0 is invalid of ixgbe_parse_ntuple_filter() -6. Safe free ixgbe_flow_list item of MARCO RTE_MALLOC_DEBUG is define (configure meson with option -Dc_args="-DRTE_MALLOC_DEBUG") +2. Use different fdir soft-id for flow rules configured on the same queue. +3. Allow IPv6 to pass flow rule ETH item validation. +4. Invalidate TCP&UDP flow item rule whose dest port is 0 in ntuple filter so that + the rule can be handed over to flow director filter in lower priority. +5. Safely free ixgbe_flow_list item when MARCO RTE_MALLOC_DEBUG is define (configure + meson with option -Dc_args="-DRTE_MALLOC_DEBUG"). +6. Change IPv4/IPv6 fdir mask default value to 0, the same value as in DPVS, so that + IPv4 and IPv6 fidr rules can coexist. + +Signed-off-by: ywc689 --- - drivers/net/ixgbe/ixgbe_flow.c | 119 ++++++++++++++++++++++++++++++++++++----- - 1 file changed, 105 insertions(+), 14 deletions(-) + drivers/net/ixgbe/ixgbe_flow.c | 100 +++++++++++++++++++++++++++++---- + 1 file changed, 88 insertions(+), 12 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_flow.c b/drivers/net/ixgbe/ixgbe_flow.c -index 9aeb2e4..481a06f 100644 +index 1b35ed5..f2f7b7f 100644 --- a/drivers/net/ixgbe/ixgbe_flow.c +++ b/drivers/net/ixgbe/ixgbe_flow.c @@ -2,7 +2,6 @@ @@ -25,7 +30,7 @@ index 9aeb2e4..481a06f 100644 #include #include #include -@@ -15,6 +14,7 @@ +@@ -14,6 +13,7 @@ #include #include @@ -33,7 +38,7 @@ index 9aeb2e4..481a06f 100644 #include #include #include -@@ -468,6 +468,29 @@ cons_parse_ntuple_filter(const struct rte_flow_attr *attr, +@@ -467,6 +467,29 @@ cons_parse_ntuple_filter(const struct rte_flow_attr *attr, } tcp_spec = item->spec; @@ -63,7 +68,7 @@ index 9aeb2e4..481a06f 100644 filter->dst_port = tcp_spec->hdr.dst_port; filter->src_port = tcp_spec->hdr.src_port; filter->tcp_flags = tcp_spec->hdr.tcp_flags; -@@ -501,6 +524,30 @@ cons_parse_ntuple_filter(const struct rte_flow_attr *attr, +@@ -500,6 +523,30 @@ cons_parse_ntuple_filter(const struct rte_flow_attr *attr, filter->src_port_mask = udp_mask->hdr.src_port; udp_spec = item->spec; @@ -94,7 +99,7 @@ index 9aeb2e4..481a06f 100644 filter->dst_port = udp_spec->hdr.dst_port; filter->src_port = udp_spec->hdr.src_port; } else if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { -@@ -1419,11 +1466,8 @@ ixgbe_parse_fdir_act_attr(const struct rte_flow_attr *attr, +@@ -1418,11 +1465,8 @@ ixgbe_parse_fdir_act_attr(const struct rte_flow_attr *attr, /* not supported */ if (attr->priority) { @@ -104,20 +109,23 @@ index 9aeb2e4..481a06f 100644 - attr, "Not support priority."); - return -rte_errno; + PMD_DRV_LOG(INFO, "ixgbe flow doesn't support priority %d " -+ "(priority must be 0), ignore and continue....\n", attr->priority); ++ "(priority must be 0), ignore and continue....", attr->priority); } /* check if the first not void action is QUEUE or DROP. */ -@@ -1642,7 +1686,7 @@ ixgbe_parse_fdir_filter_normal(struct rte_eth_dev *dev, - * value. So, we need not do anything for the not provided fields later. - */ - memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); -- memset(&rule->mask, 0xFF, sizeof(struct ixgbe_hw_fdir_mask)); -+ memset(&rule->mask, 0, sizeof(struct ixgbe_hw_fdir_mask)); /* mask default zero */ +@@ -1645,6 +1689,11 @@ ixgbe_parse_fdir_filter_normal(struct rte_eth_dev *dev, rule->mask.vlan_tci_mask = 0; rule->mask.flex_bytes_mask = 0; -@@ -1760,6 +1804,8 @@ ixgbe_parse_fdir_filter_normal(struct rte_eth_dev *dev, ++ /* DPVS requires IPv4 and IPv6 flow rules coexist in some cases, ++ * so we let the default source mask be consistent with DPVS. */ ++ rule->mask.src_ipv4_mask = 0; ++ rule->mask.src_ipv6_mask = 0; ++ + /** + * The first not void item should be + * MAC or IPv4 or TCP or UDP or SCTP. +@@ -1759,6 +1808,8 @@ ixgbe_parse_fdir_filter_normal(struct rte_eth_dev *dev, } } else { if (item->type != RTE_FLOW_ITEM_TYPE_IPV4 && @@ -126,28 +134,7 @@ index 9aeb2e4..481a06f 100644 item->type != RTE_FLOW_ITEM_TYPE_VLAN) { memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); rte_flow_error_set(error, EINVAL, -@@ -1815,6 +1861,10 @@ ixgbe_parse_fdir_filter_normal(struct rte_eth_dev *dev, - */ - rule->ixgbe_fdir.formatted.flow_type = - IXGBE_ATR_FLOW_TYPE_IPV4; -+ -+ /* Update flow rule mode by global param. */ -+ rule->mode = dev->data->dev_conf.fdir_conf.mode; -+ - /*Not supported last point for range*/ - if (item->last) { - rte_flow_error_set(error, EINVAL, -@@ -1888,6 +1938,9 @@ ixgbe_parse_fdir_filter_normal(struct rte_eth_dev *dev, - rule->ixgbe_fdir.formatted.flow_type = - IXGBE_ATR_FLOW_TYPE_IPV6; - -+ /* Update flow rule mode by global param. */ -+ rule->mode = dev->data->dev_conf.fdir_conf.mode; -+ - /** - * 1. must signature match - * 2. not support last -@@ -2748,12 +2801,45 @@ ixgbe_parse_fdir_filter_tunnel(const struct rte_flow_attr *attr, +@@ -2747,12 +2798,31 @@ ixgbe_parse_fdir_filter_tunnel(const struct rte_flow_attr *attr, return ixgbe_parse_fdir_act_attr(attr, actions, rule, error); } @@ -167,20 +154,6 @@ index 9aeb2e4..481a06f 100644 + /* Soft-id for different rx-queue should be different. */ + rule->soft_id = softid[rule->queue]++; + -+ /* Disable mask config from rte_flow. -+ * FIXME: -+ * Ixgbe only supports one global mask, all the masks should be the same. -+ * Generally, fdir masks should be configured globally before port start. -+ * But the rte_flow configures masks at flow creation. So we disable fdir -+ * mask configs in rte_flow and configure it globally when port start. -+ * Refer to `ixgbe_dev_start/ixgbe_fdir_configure` for details. The global -+ * masks are configured into device initially with user specified params. -+ */ -+ rule->b_mask = 0; -+ -+ /* Use user-defined mode. */ -+ rule->mode = dev->data->dev_conf.fdir_conf.mode; -+ + return 0; +} + @@ -194,14 +167,13 @@ index 9aeb2e4..481a06f 100644 struct rte_flow_error *error) { int ret; -@@ -2787,13 +2873,18 @@ step_next: - rule->ixgbe_fdir.formatted.dst_port != 0)) - return -ENOTSUP; - -- if (fdir_mode == RTE_FDIR_MODE_NONE || -- fdir_mode != rule->mode) -+ if (fdir_mode == RTE_FDIR_MODE_NONE) - return -ENOTSUP; +@@ -2795,12 +2865,18 @@ ixgbe_parse_fdir_filter(struct rte_eth_dev *dev, + return ret; + } + } else if (fdir_conf->mode != rule->mode) { +- return -ENOTSUP; ++ return -ENOTSUP; + } if (rule->queue >= dev->data->nb_rx_queues) return -ENOTSUP; @@ -215,7 +187,7 @@ index 9aeb2e4..481a06f 100644 return ret; } -@@ -3128,7 +3219,7 @@ ixgbe_flow_create(struct rte_eth_dev *dev, +@@ -3135,7 +3211,7 @@ ixgbe_flow_create(struct rte_eth_dev *dev, memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); ret = ixgbe_parse_fdir_filter(dev, attr, pattern, @@ -224,7 +196,7 @@ index 9aeb2e4..481a06f 100644 if (!ret) { /* A mask cannot be deleted. */ if (fdir_rule.b_mask) { -@@ -3299,7 +3390,7 @@ ixgbe_flow_validate(struct rte_eth_dev *dev, +@@ -3306,7 +3382,7 @@ ixgbe_flow_validate(struct rte_eth_dev *dev, memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); ret = ixgbe_parse_fdir_filter(dev, attr, pattern, @@ -233,7 +205,7 @@ index 9aeb2e4..481a06f 100644 if (!ret) return 0; -@@ -3335,7 +3426,7 @@ ixgbe_flow_destroy(struct rte_eth_dev *dev, +@@ -3342,7 +3418,7 @@ ixgbe_flow_destroy(struct rte_eth_dev *dev, struct ixgbe_eth_syn_filter_ele *syn_filter_ptr; struct ixgbe_eth_l2_tunnel_conf_ele *l2_tn_filter_ptr; struct ixgbe_fdir_rule_ele *fdir_rule_ptr; @@ -242,15 +214,15 @@ index 9aeb2e4..481a06f 100644 struct ixgbe_hw_fdir_info *fdir_info = IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private); struct ixgbe_rss_conf_ele *rss_filter_ptr; -@@ -3432,7 +3523,7 @@ ixgbe_flow_destroy(struct rte_eth_dev *dev, +@@ -3439,7 +3515,7 @@ ixgbe_flow_destroy(struct rte_eth_dev *dev, return ret; } - TAILQ_FOREACH(ixgbe_flow_mem_ptr, &ixgbe_flow_list, entries) { -+ TAILQ_FOREACH_SAFE(ixgbe_flow_mem_ptr, &ixgbe_flow_list, entries, next_ptr) { ++ RTE_TAILQ_FOREACH_SAFE(ixgbe_flow_mem_ptr, &ixgbe_flow_list, entries, next_ptr) { if (ixgbe_flow_mem_ptr->flow == pmd_flow) { TAILQ_REMOVE(&ixgbe_flow_list, ixgbe_flow_mem_ptr, entries); -- -1.8.3.1 +2.31.1 diff --git a/patch/dpdk-stable-20.11.10/0005-bonding-allow-slaves-from-different-numa-nodes.patch b/patch/dpdk-24.11/0004-bonding-allow-slaves-from-different-numa-nodes.patch similarity index 68% rename from patch/dpdk-stable-20.11.10/0005-bonding-allow-slaves-from-different-numa-nodes.patch rename to patch/dpdk-24.11/0004-bonding-allow-slaves-from-different-numa-nodes.patch index 12a011e16..9587cf3da 100644 --- a/patch/dpdk-stable-20.11.10/0005-bonding-allow-slaves-from-different-numa-nodes.patch +++ b/patch/dpdk-24.11/0004-bonding-allow-slaves-from-different-numa-nodes.patch @@ -1,23 +1,22 @@ -From 30c3918317ea30a7586f2c081a6623c4574dade9 Mon Sep 17 00:00:00 2001 -From: huangyichen -Date: Wed, 4 Aug 2021 15:16:04 +0800 -Subject: [PATCH 5/7] bonding: allow slaves from different numa nodes +From c6c5e201b4c02eb4ca7f1592ac901223caea78a5 Mon Sep 17 00:00:00 2001 +From: ywc689 +Date: Mon, 16 Dec 2024 14:39:15 +0800 +Subject: [PATCH 4/6] bonding: allow slaves from different numa nodes -Note the patch may have a negative influnce on performance. -It's not a good practice to bonding slaves across numa nodes. +Signed-off-by: ywc689 --- drivers/net/bonding/rte_eth_bond_pmd.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c -index 0c9a1df..371c888 100644 +index 91bf2c2..eebb8d2 100644 --- a/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/drivers/net/bonding/rte_eth_bond_pmd.c -@@ -1780,7 +1780,14 @@ struct bwg_slave { +@@ -1815,7 +1815,14 @@ member_start(struct rte_eth_dev *bonding_eth_dev, - errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id, + errval = rte_eth_rx_queue_setup(member_port_id, q_id, bd_rx_q->nb_rx_desc, -- rte_eth_dev_socket_id(slave_eth_dev->data->port_id), +- rte_eth_dev_socket_id(member_port_id), + // In spite of performance problem, bonding slaves had better to support + // slaves from different numa nodes. Considering that numa node on which + // the resources of bonding port is allocated from is specified by @@ -29,11 +28,11 @@ index 0c9a1df..371c888 100644 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool); if (errval != 0) { RTE_BOND_LOG(ERR, -@@ -1796,7 +1803,14 @@ struct bwg_slave { +@@ -1831,7 +1838,14 @@ member_start(struct rte_eth_dev *bonding_eth_dev, - errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id, + errval = rte_eth_tx_queue_setup(member_port_id, q_id, bd_tx_q->nb_tx_desc, -- rte_eth_dev_socket_id(slave_eth_dev->data->port_id), +- rte_eth_dev_socket_id(member_port_id), + // In spite of performance problem, bonding slaves had better to support + // slaves from different numa nodes. Considering that numa node on which + // the resources of bonding port is allocated from is specified by @@ -46,5 +45,5 @@ index 0c9a1df..371c888 100644 if (errval != 0) { RTE_BOND_LOG(ERR, -- -1.8.3.1 +2.31.1 diff --git a/patch/dpdk-stable-20.11.10/0006-bonding-fix-problem-in-mode-4-dropping-multicast-pac.patch b/patch/dpdk-24.11/0005-bonding-fix-problem-in-mode-4-dropping-multicast-pac.patch similarity index 58% rename from patch/dpdk-stable-20.11.10/0006-bonding-fix-problem-in-mode-4-dropping-multicast-pac.patch rename to patch/dpdk-24.11/0005-bonding-fix-problem-in-mode-4-dropping-multicast-pac.patch index c7f420f8d..be09aff66 100644 --- a/patch/dpdk-stable-20.11.10/0006-bonding-fix-problem-in-mode-4-dropping-multicast-pac.patch +++ b/patch/dpdk-24.11/0005-bonding-fix-problem-in-mode-4-dropping-multicast-pac.patch @@ -1,26 +1,26 @@ -From 2d3c711e48d4f09200096348be1286eec10301f6 Mon Sep 17 00:00:00 2001 -From: yuwenchao -Date: Fri, 2 Aug 2024 13:32:36 +0800 -Subject: [PATCH 6/7] bonding: fix problem in mode 4 dropping multicast packets +From 207f750e3c4deffe607a2bd792d3ca32ded31fbb Mon Sep 17 00:00:00 2001 +From: ywc689 +Date: Mon, 16 Dec 2024 15:20:44 +0800 +Subject: [PATCH 5/6] bonding: fix problem in mode 4 dropping multicast packets -Signed-off-by: yuwenchao +Signed-off-by: ywc689 --- - drivers/net/bonding/rte_eth_bond_pmd.c | 38 +++++++++++++++++++++------------- + drivers/net/bonding/rte_eth_bond_pmd.c | 38 ++++++++++++++++---------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c -index 371c888..f770f50 100644 +index eebb8d2..35e0bad 100644 --- a/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/drivers/net/bonding/rte_eth_bond_pmd.c -@@ -309,7 +309,6 @@ +@@ -312,7 +312,6 @@ rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, - uint8_t collecting; /* current slave collecting status */ + uint8_t collecting; /* current member collecting status */ const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id); - const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id); uint8_t subtype; uint16_t i; uint16_t j; -@@ -352,20 +351,28 @@ +@@ -355,20 +354,28 @@ rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, * - bonding interface is not in promiscuous mode and * packet address isn't in mac_addrs array: * - packet is unicast, @@ -37,15 +37,15 @@ index 371c888..f770f50 100644 - bufs[j])) || - !collecting || - (!promisc && -- !is_bond_mac_addr(&hdr->d_addr, bond_mac, +- !is_bond_mac_addr(&hdr->dst_addr, bond_mac, - BOND_MAX_MAC_ADDRS) && -- (rte_is_unicast_ether_addr(&hdr->d_addr) || +- (rte_is_unicast_ether_addr(&hdr->dst_addr) || - !allmulti)))) { + if (unlikely((is_lacp_packets(hdr->ether_type, subtype, bufs[j])) + || !collecting -+ || (!promisc && !is_bond_mac_addr(&hdr->d_addr, bond_mac, ++ || (!promisc && !is_bond_mac_addr(&hdr->dst_addr, bond_mac, + BOND_MAX_MAC_ADDRS) -+ && (rte_is_unicast_ether_addr(&hdr->d_addr))))) { ++ && (rte_is_unicast_ether_addr(&hdr->dst_addr))))) { if (hdr->ether_type == ether_type_slow_be) { + if (dedicated_rxq) { + /* Error! Lacp packets should never appear here if @@ -56,24 +56,24 @@ index 371c888..f770f50 100644 + * */ + RTE_BOND_LOG(WARNING, "receive lacp packets from queue %d " + "of port %d when dedicated queue enabled", -+ bd_rx_q->queue_id, slaves[idx]); ++ bd_rx_q->queue_id, members[idx]); + } bond_mode_8023ad_handle_slow_pkt( - internals, slaves[idx], bufs[j]); + internals, members[idx], bufs[j]); } else -@@ -1288,8 +1295,11 @@ struct bwg_slave { - slave_port_ids[i]; +@@ -1305,8 +1312,11 @@ tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs, + member_port_ids[i]; } -- if (unlikely(dist_slave_count < 1)) -+ if (unlikely(dist_slave_count < 1)) { -+ RTE_BOND_LOG(WARNING, "no distributing slaves on bonding port %d", +- if (unlikely(dist_member_count < 1)) ++ if (unlikely(dist_member_count < 1)) { ++ RTE_BOND_LOG(WARNING, "no distributing members on bonding port %d", + internals->port_id); return 0; + } - return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids, - dist_slave_count); + return tx_burst_balance(queue, bufs, nb_bufs, dist_member_port_ids, + dist_member_count); -- -1.8.3.1 +2.31.1 diff --git a/patch/dpdk-24.11/0006-bonding-device-supports-sending-packets-from-user-sp.patch b/patch/dpdk-24.11/0006-bonding-device-supports-sending-packets-from-user-sp.patch new file mode 100644 index 000000000..6d6523375 --- /dev/null +++ b/patch/dpdk-24.11/0006-bonding-device-supports-sending-packets-from-user-sp.patch @@ -0,0 +1,86 @@ +From 158b5708203309449e0ac85d9b6034158ae8a351 Mon Sep 17 00:00:00 2001 +From: ywc689 +Date: Mon, 16 Dec 2024 18:06:55 +0800 +Subject: [PATCH 6/6] bonding device supports sending packets from user + specified member, which ensures protocols like LLDP work properly. + +Signed-off-by: ywc689 +--- + drivers/net/bonding/rte_eth_bond_pmd.c | 26 ++++++++++++++++++++++++-- + lib/mbuf/rte_mbuf.h | 2 ++ + 2 files changed, 26 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c +index 35e0bad..d9efb2b 100644 +--- a/drivers/net/bonding/rte_eth_bond_pmd.c ++++ b/drivers/net/bonding/rte_eth_bond_pmd.c +@@ -586,6 +586,22 @@ bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + return nb_recv_pkts; + } + ++static inline int ++bond_ethdev_populate_member_by_user(const struct rte_mbuf *mbuf, const uint16_t *members, ++ int num_member) ++{ ++ uint16_t i, pid = mbuf->hash.txadapter.reserved2; ++ ++ if (likely(pid == RTE_MBUF_PORT_INVALID)) ++ return -1; ++ ++ for (i = 0; i < num_member; i++) { ++ if (members[i] == pid) ++ return i; ++ } ++ return -1; ++} ++ + static uint16_t + bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs, + uint16_t nb_pkts) +@@ -618,7 +634,9 @@ bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs, + + /* Populate members mbuf with which packets are to be sent on it */ + for (i = 0; i < nb_pkts; i++) { +- cmember_idx = (member_idx + i) % num_of_members; ++ cmember_idx = bond_ethdev_populate_member_by_user(bufs[i], members, num_of_members); ++ if (likely(cmember_idx < 0)) ++ cmember_idx = (member_idx + i) % num_of_members; + member_bufs[cmember_idx][(member_nb_pkts[cmember_idx])++] = bufs[i]; + } + +@@ -1188,7 +1206,11 @@ tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs, + + for (i = 0; i < nb_bufs; i++) { + /* Populate member mbuf arrays with mbufs for that member. */ +- uint16_t member_idx = bufs_member_port_idxs[i]; ++ int member_idx; ++ ++ member_idx = bond_ethdev_populate_member_by_user(bufs[i], member_port_ids, member_count); ++ if (likely(member_idx < 0)) ++ member_idx = bufs_member_port_idxs[i]; + + member_bufs[member_idx][member_nb_bufs[member_idx]++] = bufs[i]; + } +diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h +index 0d2e0e6..3d0339f 100644 +--- a/lib/mbuf/rte_mbuf.h ++++ b/lib/mbuf/rte_mbuf.h +@@ -602,6 +602,7 @@ static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp) + + if (rte_mempool_get(mp, &ret.ptr) < 0) + return NULL; ++ ret.m->hash.txadapter.reserved2 = RTE_MBUF_PORT_INVALID; + __rte_mbuf_raw_sanity_check(ret.m); + return ret.m; + } +@@ -882,6 +883,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m) + m->vlan_tci_outer = 0; + m->nb_segs = 1; + m->port = RTE_MBUF_PORT_INVALID; ++ m->hash.txadapter.reserved2 = RTE_MBUF_PORT_INVALID; + + m->ol_flags &= RTE_MBUF_F_EXTERNAL; + m->packet_type = 0; +-- +2.31.1 + diff --git a/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch b/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch deleted file mode 100644 index e39254c24..000000000 --- a/patch/dpdk-stable-20.11.1/0001-kni-use-netlink-event-for-multicast-driver-part.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 5b032cc0d59f9fe2e9607423a92399254e30a8f7 Mon Sep 17 00:00:00 2001 -From: huangyichen -Date: Thu, 1 Jul 2021 21:21:16 +0800 -Subject: [PATCH 1/6] kni: use netlink event for multicast (driver part) - -Kni driver sends netlink event every time hw-multicast list updated by -kernel, the user kni app should capture the event and update multicast -to kni device. - -Original way is using rte_kni_request to pass hw-multicast to user kni -module. That method works but finally memory corruption found, which is -not easy to address. That's why we use netlink event instead. ---- - kernel/linux/kni/kni_net.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 76 insertions(+) - -diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c -index 4b75208..cde565e 100644 ---- a/kernel/linux/kni/kni_net.c -+++ b/kernel/linux/kni/kni_net.c -@@ -17,6 +17,8 @@ - #include - #include - #include -+#include -+#include - - #include - #include -@@ -128,6 +130,7 @@ kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req) - ret_val = wait_event_interruptible_timeout(kni->wq, - kni_fifo_count(kni->resp_q), 3 * HZ); - if (signal_pending(current) || ret_val <= 0) { -+ pr_err("%s: wait_event_interruptible timeout\n", __func__); - ret = -ETIME; - goto fail; - } -@@ -657,6 +660,77 @@ kni_net_change_mtu(struct net_device *dev, int new_mtu) - return (ret == 0) ? req.result : ret; - } - -+static size_t -+kni_nlmsg_size(void) -+{ -+ return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) -+ + nla_total_size(4) /* IFA_ADDRESS */ -+ + nla_total_size(4) /* IFA_LOCAL */ -+ + nla_total_size(4) /* IFA_BROADCAST */ -+ + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ -+ + nla_total_size(4) /* IFA_FLAGS */ -+ + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ -+} -+ -+static void -+kni_net_set_rx_mode(struct net_device *dev) -+{ -+ /* -+ * send event to notify user (DPDK KNI app) that multicast list changed, -+ * so that it can monitor multicast join/leave and set HW mc-addrs to -+ * kni dev accordinglly. -+ * -+ * this event is just an notification, we do not save any mc-addr here -+ * (so attribute space for us). user kni app should get maddrs after -+ * receive this notification. -+ * -+ * I was expecting kernel send some rtnl event for multicast join/leave, -+ * but it doesn't. By checking the call-chain of SIOCADDMULTI (ip maddr, -+ * manages only hardware multicast) and IP_ADD_MEMBERSHIP (ip_mc_join_group, -+ * used to for IPv4 multicast), no rtnl event sent. -+ * -+ * so as workaround, modify kni driver here to send RTM_NEWADDR. -+ * it may not suitalbe to use this event for mcast, but that should works. -+ * hope that won't affect other listener to this event. -+ * -+ * previous solution was using rte_kni_request to pass hw-maddr list to user. -+ * it "works" for times but finally memory corruption found, which is -+ * not easy to address (lock was added and reviewed). That's why we use -+ * netlink event instead. -+ */ -+ struct sk_buff *skb; -+ struct net *net = dev_net(dev); -+ struct nlmsghdr *nlh; -+ struct ifaddrmsg *ifm; -+ -+ skb = nlmsg_new(kni_nlmsg_size(), GFP_ATOMIC); -+ if (!skb) -+ return; -+ -+ /* no other event for us ? */ -+ nlh = nlmsg_put(skb, 0, 0, RTM_NEWADDR, sizeof(*ifm), 0); -+ if (!nlh) { -+ kfree_skb(skb); -+ return; -+ } -+ -+ /* just send an notification so no other info */ -+ ifm = nlmsg_data(nlh); -+ memset(ifm, 0, sizeof(*ifm)); -+ ifm->ifa_family = AF_UNSPEC; -+ ifm->ifa_prefixlen = 0; -+ ifm->ifa_flags = 0; -+ ifm->ifa_scope = RT_SCOPE_NOWHERE; -+ ifm->ifa_index = 0; -+ -+ nlmsg_end(skb, nlh); -+ -+ /* other group ? */ -+ pr_debug("%s: rx-mode/multicast-list changed\n", __func__); -+ rtnl_notify(skb, net, 0, RTNLGRP_NOTIFY, NULL, GFP_ATOMIC); -+ return; -+} -+ - static void - kni_net_change_rx_flags(struct net_device *netdev, int flags) - { -@@ -757,6 +831,7 @@ kni_net_set_mac(struct net_device *netdev, void *p) - kni = netdev_priv(netdev); - ret = kni_net_process_request(kni, &req); - -+ pr_info("%s request returns %d!\n", __func__, ret); - return (ret == 0 ? req.result : ret); - } - -@@ -788,6 +863,7 @@ static const struct net_device_ops kni_net_netdev_ops = { - .ndo_change_rx_flags = kni_net_change_rx_flags, - .ndo_start_xmit = kni_net_tx, - .ndo_change_mtu = kni_net_change_mtu, -+ .ndo_set_rx_mode = kni_net_set_rx_mode, - .ndo_tx_timeout = kni_net_tx_timeout, - .ndo_set_mac_address = kni_net_set_mac, - #ifdef HAVE_CHANGE_CARRIER_CB --- -1.8.3.1 - diff --git a/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch b/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch deleted file mode 100644 index 89d3f4c47..000000000 --- a/patch/dpdk-stable-20.11.1/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch +++ /dev/null @@ -1,555 +0,0 @@ -From 8d1dc22740a315d62596445beba8b8737c45ffa4 Mon Sep 17 00:00:00 2001 -From: huangyichen -Date: Thu, 1 Jul 2021 21:23:50 +0800 -Subject: [PATCH 2/6] pdump: change dpdk-pdump tool for dpvs - ---- - app/pdump/main.c | 167 ++++++++++++++++++++++++++++++++++++++++--- - lib/librte_pdump/rte_pdump.c | 145 +++++++++++++++++++++++++++++++++++-- - lib/librte_pdump/rte_pdump.h | 27 +++++++ - 3 files changed, 327 insertions(+), 12 deletions(-) - -diff --git a/app/pdump/main.c b/app/pdump/main.c -index b34bf33..9d14474 100644 ---- a/app/pdump/main.c -+++ b/app/pdump/main.c -@@ -27,6 +27,7 @@ - #include - #include - #include -+#include - - #define CMD_LINE_OPT_PDUMP "pdump" - #define CMD_LINE_OPT_PDUMP_NUM 256 -@@ -42,6 +43,14 @@ - #define PDUMP_MSIZE_ARG "mbuf-size" - #define PDUMP_NUM_MBUFS_ARG "total-num-mbufs" - -+#define PDUMP_HOST_ARG "host" -+#define PDUMP_SRC_ARG "src-host" -+#define PDUMP_DST_ARG "dst-host" -+#define PDUMP_PROTO_PORT_AGE "proto-port" -+#define PDUMP_SPORT_ARG "src-port" -+#define PDUMP_DPORT_ARG "dst-port" -+#define PDUMP_PROTO_ARG "proto" -+ - #define VDEV_NAME_FMT "net_pcap_%s_%d" - #define VDEV_PCAP_ARGS_FMT "tx_pcap=%s" - #define VDEV_IFACE_ARGS_FMT "tx_iface=%s" -@@ -97,6 +106,13 @@ static const char * const valid_pdump_arguments[] = { - PDUMP_RING_SIZE_ARG, - PDUMP_MSIZE_ARG, - PDUMP_NUM_MBUFS_ARG, -+ PDUMP_HOST_ARG, -+ PDUMP_SRC_ARG, -+ PDUMP_DST_ARG, -+ PDUMP_PROTO_PORT_AGE, -+ PDUMP_SPORT_ARG, -+ PDUMP_DPORT_ARG, -+ PDUMP_PROTO_ARG, - NULL - }; - -@@ -130,6 +146,7 @@ struct pdump_tuples { - enum pcap_stream rx_vdev_stream_type; - enum pcap_stream tx_vdev_stream_type; - bool single_pdump_dev; -+ struct pdump_filter *filter; - - /* stats */ - struct pdump_stats stats; -@@ -158,6 +175,11 @@ pdump_usage(const char *prgname) - "(queue=)," - "(rx-dev= |" - " tx-dev=," -+ "[host= | src-host= |" -+ "dst-host=]," -+ "[proto=support:tcp/udp/icmp]," -+ "[proto-port= |src-port= |" -+ "dst-port=]," - "[ring-size=default:16384]," - "[mbuf-size=default:2176]," - "[total-num-mbufs=default:65535]'\n", -@@ -244,6 +266,64 @@ parse_uint_value(const char *key, const char *value, void *extra_args) - } - - static int -+parse_host(const char *key __rte_unused, const char *value, void *extra_args) -+{ -+ struct pdump_tuples *pt = extra_args; -+ struct in_addr inaddr; -+ struct in6_addr inaddr6; -+ union addr addr; -+ int af = 0; -+ -+ if (inet_pton(AF_INET6, value, &inaddr6) > 0) { -+ af = AF_INET6; -+ addr.in6 = inaddr6; -+ } else if (inet_pton(AF_INET, value, &inaddr) > 0){ -+ af = AF_INET; -+ addr.in = inaddr; -+ } else { -+ printf("IP address invaled\n"); -+ return -EINVAL; -+ } -+ -+ if (pt->filter && pt->filter->af != 0 && af != pt->filter->af) { -+ printf("IPv4 and IPv6 conflict\n"); -+ return -EINVAL; -+ } else { -+ pt->filter->af = af; -+ } -+ -+ if (!strcmp(key, PDUMP_HOST_ARG)) { -+ rte_memcpy(&pt->filter->host_addr, &addr, sizeof(addr)); -+ } else if (!strcmp(key, PDUMP_SRC_ARG)) { -+ rte_memcpy(&pt->filter->s_addr, &addr, sizeof(addr)); -+ } else if (!strcmp(key, PDUMP_DST_ARG)) { -+ rte_memcpy(&pt->filter->d_addr, &addr, sizeof(addr)); -+ } -+ -+ return 0; -+} -+ -+static int -+parse_proto(const char *key __rte_unused, const char *value, void *extra_args) -+{ -+ struct pdump_tuples *pt = extra_args; -+ -+ if (!strcmp(value, "tcp")) { -+ pt->filter->proto = IPPROTO_TCP; -+ } else if (!strcmp(value, "udp")) { -+ pt->filter->proto = IPPROTO_UDP; -+ } else if (!strcmp(value, "icmp")) { -+ pt->filter->proto = IPPROTO_ICMP; -+ } else { -+ printf("invalid value:\"%s\" for key:\"%s\", " -+ "value must be tcp/udp/icmp\n", value, key); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+static int - parse_pdump(const char *optarg) - { - struct rte_kvargs *kvlist; -@@ -370,6 +450,75 @@ parse_pdump(const char *optarg) - } else - pt->total_num_mbufs = MBUFS_PER_POOL; - -+ /* filter parsing and validation */ -+ pt->filter = rte_zmalloc("pdump_filter", -+ sizeof(struct pdump_filter), 0); -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_HOST_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_HOST_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_SRC_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_SRC_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_DST_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_DST_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_PORT_AGE); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_PORT_AGE, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->proto_port = (uint16_t) v.val; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_SPORT_ARG); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_SPORT_ARG, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->s_port = (uint16_t) v.val; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_DPORT_ARG); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_DPORT_ARG, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->d_port = (uint16_t) v.val; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_ARG, -+ &parse_proto, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ - num_tuples++; - - free_kvlist: -@@ -510,6 +659,8 @@ cleanup_rings(void) - rte_ring_free(pt->rx_ring); - if (pt->tx_ring) - rte_ring_free(pt->tx_ring); -+ if (pt->filter) -+ rte_free(pt->filter); - } - } - -@@ -837,20 +988,20 @@ enable_pdump(void) - pt->queue, - RTE_PDUMP_FLAG_RX, - pt->rx_ring, -- pt->mp, NULL); -+ pt->mp, pt->filter); - ret1 = rte_pdump_enable_by_deviceid( - pt->device_id, - pt->queue, - RTE_PDUMP_FLAG_TX, - pt->tx_ring, -- pt->mp, NULL); -+ pt->mp, pt->filter); - } else if (pt->dump_by_type == PORT_ID) { - ret = rte_pdump_enable(pt->port, pt->queue, - RTE_PDUMP_FLAG_RX, -- pt->rx_ring, pt->mp, NULL); -+ pt->rx_ring, pt->mp, pt->filter); - ret1 = rte_pdump_enable(pt->port, pt->queue, - RTE_PDUMP_FLAG_TX, -- pt->tx_ring, pt->mp, NULL); -+ pt->tx_ring, pt->mp, pt->filter); - } - } else if (pt->dir == RTE_PDUMP_FLAG_RX) { - if (pt->dump_by_type == DEVICE_ID) -@@ -858,22 +1009,22 @@ enable_pdump(void) - pt->device_id, - pt->queue, - pt->dir, pt->rx_ring, -- pt->mp, NULL); -+ pt->mp, pt->filter); - else if (pt->dump_by_type == PORT_ID) - ret = rte_pdump_enable(pt->port, pt->queue, - pt->dir, -- pt->rx_ring, pt->mp, NULL); -+ pt->rx_ring, pt->mp, pt->filter); - } else if (pt->dir == RTE_PDUMP_FLAG_TX) { - if (pt->dump_by_type == DEVICE_ID) - ret = rte_pdump_enable_by_deviceid( - pt->device_id, - pt->queue, - pt->dir, -- pt->tx_ring, pt->mp, NULL); -+ pt->tx_ring, pt->mp, pt->filter); - else if (pt->dump_by_type == PORT_ID) - ret = rte_pdump_enable(pt->port, pt->queue, - pt->dir, -- pt->tx_ring, pt->mp, NULL); -+ pt->tx_ring, pt->mp, pt->filter); - } - if (ret < 0 || ret1 < 0) { - cleanup_pdump_resources(); -diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c -index b3c8d5c..b73fb8f 100644 ---- a/lib/librte_pdump/rte_pdump.c -+++ b/lib/librte_pdump/rte_pdump.c -@@ -9,6 +9,10 @@ - #include - #include - #include -+#include -+#include -+#include -+#include - - #include "rte_pdump.h" - -@@ -69,6 +73,132 @@ static struct pdump_rxtx_cbs { - } rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT], - tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT]; - -+static int -+inet_addr_equal(int af, const union addr *a1, -+ const union addr *a2) -+{ -+ switch (af) { -+ case AF_INET: -+ return a1->in.s_addr == a2->in.s_addr; -+ case AF_INET6: -+ return memcmp(a1->in6.s6_addr, a2->in6.s6_addr, 16) == 0; -+ default: -+ return memcmp(a1, a2, sizeof(union addr)) == 0; -+ } -+} -+ -+static int -+inet_is_addr_any(int af, const union addr *addr) -+{ -+ switch (af) { -+ case AF_INET: -+ return addr->in.s_addr == htonl(INADDR_ANY); -+ case AF_INET6: -+ return IN6_ARE_ADDR_EQUAL(&addr->in6, &in6addr_any); -+ default: -+ return -1; -+ } -+ -+ return -1; -+} -+static int -+pdump_filter(struct rte_mbuf *m, struct pdump_filter *filter) -+{ -+ struct rte_ether_hdr *eth_hdr; -+ struct vlan_eth_hdr *vlan_eth_hdr; -+ union addr s_addr, d_addr; -+ int prepend = 0; -+ uint16_t type = 0; -+ uint16_t iph_len = 0; -+ uint8_t proto = 0; -+ -+ int af; -+ -+ if (filter->af == 0 && filter->s_port == 0 && -+ filter->d_port == 0 && filter->proto == 0 && -+ filter->proto_port == 0) -+ return 0; -+ -+ eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); -+ -+ if (eth_hdr->ether_type == htons(ETH_P_8021Q)) { -+ prepend += sizeof(struct vlan_eth_hdr); -+ vlan_eth_hdr = rte_pktmbuf_mtod(m, struct vlan_eth_hdr *); -+ type = vlan_eth_hdr->h_vlan_encapsulated_proto; -+ } else { -+ prepend += sizeof(struct rte_ether_hdr); -+ eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); -+ type = eth_hdr->ether_type; -+ } -+ -+ if (rte_pktmbuf_adj(m, prepend) == NULL) -+ goto prepend; -+ -+ if (type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) { -+ struct rte_arp_hdr *arp = rte_pktmbuf_mtod(m, struct rte_arp_hdr *); -+ af = AF_INET; -+ s_addr.in.s_addr = arp->arp_data.arp_sip; -+ d_addr.in.s_addr = arp->arp_data.arp_tip; -+ } else if (type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) { -+ struct rte_ipv4_hdr *ip4 = rte_pktmbuf_mtod(m, struct rte_ipv4_hdr *); -+ af = AF_INET; -+ s_addr.in.s_addr = ip4->src_addr; -+ d_addr.in.s_addr = ip4->dst_addr; -+ proto = ip4->next_proto_id; -+ iph_len = (ip4->version_ihl & 0xf) << 2; -+ } else if (type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) { -+ struct rte_ipv6_hdr *ip6 = rte_pktmbuf_mtod(m, struct rte_ipv6_hdr *); -+ af = AF_INET6; -+ rte_memcpy(&s_addr.in6, &ip6->src_addr, 16); -+ rte_memcpy(&d_addr.in6, &ip6->dst_addr, 16); -+ proto = ip6->proto; -+ iph_len = sizeof(struct rte_ipv6_hdr); -+ } else { -+ goto prepend; -+ } -+ -+ /*filter*/ -+ if (!inet_is_addr_any(af, &filter->s_addr) && -+ !inet_addr_equal(af, &filter->s_addr, &s_addr)) -+ goto prepend; -+ if (!inet_is_addr_any(af, &filter->d_addr) && -+ !inet_addr_equal(af, &filter->d_addr, &d_addr)) -+ goto prepend; -+ if (!inet_is_addr_any(af, &filter->host_addr) && -+ !inet_addr_equal(af, &filter->host_addr, &s_addr) && -+ !inet_addr_equal(af, &filter->host_addr, &d_addr)) -+ goto prepend; -+ -+ if (filter->proto && filter->proto != proto) -+ goto prepend; -+ -+ if (filter->s_port || filter->d_port || filter->proto_port) { -+ if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) -+ goto prepend; -+ struct rte_udp_hdr _uh; -+ const struct rte_udp_hdr *uh; -+ uh = rte_pktmbuf_read(m, iph_len, sizeof(_uh), &_uh); -+ if (uh == NULL) -+ goto prepend; -+ if (filter->s_port && filter->s_port != rte_cpu_to_be_16(uh->src_port)) -+ goto prepend; -+ -+ if (filter->d_port && filter->d_port != rte_cpu_to_be_16(uh->dst_port)) -+ goto prepend; -+ -+ if (filter->proto_port && -+ filter->proto_port != rte_cpu_to_be_16(uh->src_port) && -+ filter->proto_port != rte_cpu_to_be_16(uh->dst_port)) -+ goto prepend; -+ } -+ -+ rte_pktmbuf_prepend(m, prepend); -+ return 0; -+ -+prepend: -+ rte_pktmbuf_prepend(m, prepend); -+ return -1; -+} - - static inline void - pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params) -@@ -86,6 +216,8 @@ pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params) - ring = cbs->ring; - mp = cbs->mp; - for (i = 0; i < nb_pkts; i++) { -+ if (pdump_filter(pkts[i], cbs->filter) != 0) -+ continue; - p = rte_pktmbuf_copy(pkts[i], mp, 0, UINT32_MAX); - if (p) - dup_bufs[d_pkts++] = p; -@@ -122,7 +254,7 @@ pdump_tx(uint16_t port __rte_unused, uint16_t qidx __rte_unused, - static int - pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, - struct rte_ring *ring, struct rte_mempool *mp, -- uint16_t operation) -+ struct pdump_filter *filter, uint16_t operation) - { - uint16_t qid; - struct pdump_rxtx_cbs *cbs = NULL; -@@ -140,6 +272,7 @@ pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, - } - cbs->ring = ring; - cbs->mp = mp; -+ cbs->filter = filter; - cbs->cb = rte_eth_add_first_rx_callback(port, qid, - pdump_rx, cbs); - if (cbs->cb == NULL) { -@@ -176,7 +309,7 @@ pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, - static int - pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, - struct rte_ring *ring, struct rte_mempool *mp, -- uint16_t operation) -+ struct pdump_filter *filter, uint16_t operation) - { - - uint16_t qid; -@@ -195,6 +328,7 @@ pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, - } - cbs->ring = ring; - cbs->mp = mp; -+ cbs->filter = filter; - cbs->cb = rte_eth_add_tx_callback(port, qid, pdump_tx, - cbs); - if (cbs->cb == NULL) { -@@ -238,6 +372,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p) - uint16_t operation; - struct rte_ring *ring; - struct rte_mempool *mp; -+ struct pdump_filter *filter; - - flags = p->flags; - operation = p->op; -@@ -253,6 +388,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p) - queue = p->data.en_v1.queue; - ring = p->data.en_v1.ring; - mp = p->data.en_v1.mp; -+ filter = p->data.en_v1.filter; - } else { - ret = rte_eth_dev_get_port_by_name(p->data.dis_v1.device, - &port); -@@ -265,6 +401,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p) - queue = p->data.dis_v1.queue; - ring = p->data.dis_v1.ring; - mp = p->data.dis_v1.mp; -+ filter = p->data.dis_v1.filter; - } - - /* validation if packet capture is for all queues */ -@@ -303,7 +440,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p) - if (flags & RTE_PDUMP_FLAG_RX) { - end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_rx_q : queue + 1; - ret = pdump_register_rx_callbacks(end_q, port, queue, ring, mp, -- operation); -+ filter, operation); - if (ret < 0) - return ret; - } -@@ -312,7 +449,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p) - if (flags & RTE_PDUMP_FLAG_TX) { - end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_tx_q : queue + 1; - ret = pdump_register_tx_callbacks(end_q, port, queue, ring, mp, -- operation); -+ filter, operation); - if (ret < 0) - return ret; - } -diff --git a/lib/librte_pdump/rte_pdump.h b/lib/librte_pdump/rte_pdump.h -index 6b00fc1..3986b07 100644 ---- a/lib/librte_pdump/rte_pdump.h -+++ b/lib/librte_pdump/rte_pdump.h -@@ -15,6 +15,8 @@ - #include - #include - #include -+#include -+#include - - #ifdef __cplusplus - extern "C" { -@@ -29,6 +31,31 @@ enum { - RTE_PDUMP_FLAG_RXTX = (RTE_PDUMP_FLAG_RX|RTE_PDUMP_FLAG_TX) - }; - -+union addr { -+ struct in_addr in; -+ struct in6_addr in6; -+}; -+ -+struct pdump_filter { -+ int af; -+ union addr s_addr; -+ union addr d_addr; -+ union addr host_addr; //s_addr or d_addr -+ -+ uint8_t proto; -+ uint16_t proto_port; //s_port or d_port -+ uint16_t s_port; -+ uint16_t d_port; -+}; -+ -+struct vlan_eth_hdr { -+ unsigned char h_dest[ETH_ALEN]; -+ unsigned char h_source[ETH_ALEN]; -+ unsigned short h_vlan_proto; -+ unsigned short h_vlan_TCI; -+ unsigned short h_vlan_encapsulated_proto; -+}; -+ - /** - * Initialize packet capturing handling - * --- -1.8.3.1 - diff --git a/patch/dpdk-stable-20.11.1/0003-debug-enable-dpdk-eal-memory-debug.patch b/patch/dpdk-stable-20.11.1/0003-debug-enable-dpdk-eal-memory-debug.patch deleted file mode 100644 index 03ff38ba7..000000000 --- a/patch/dpdk-stable-20.11.1/0003-debug-enable-dpdk-eal-memory-debug.patch +++ /dev/null @@ -1,59 +0,0 @@ -From e31fd685ced591060571375c70c69cd8ccf8dad9 Mon Sep 17 00:00:00 2001 -From: huangyichen -Date: Thu, 1 Jul 2021 21:24:47 +0800 -Subject: [PATCH 3/6] debug: enable dpdk eal memory debug - -The patch is used for memory debug. To use the patch, configure meson with option --Dc_args="-DRTE_MALLOC_DEBUG" when building dpdk. For example, - -meson -Dc_args="-DRTE_MALLOC_DEBUG" -Dbuildtype=debug -Dprefix=$(pwd)/dpdklib dpdkbuild -ninja -C dpdkbuild ---- - lib/librte_eal/common/rte_malloc.c | 4 ++++ - lib/librte_eal/include/rte_malloc.h | 15 +++++++++++++++ - 2 files changed, 19 insertions(+) - -diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c -index 9d39e58..2b6d1ab 100644 ---- a/lib/librte_eal/common/rte_malloc.c -+++ b/lib/librte_eal/common/rte_malloc.c -@@ -30,6 +30,10 @@ - #include "eal_memcfg.h" - #include "eal_private.h" - -+int rte_memmory_ok(void *addr) -+{ -+ return malloc_elem_cookies_ok(RTE_PTR_SUB(addr, MALLOC_ELEM_HEADER_LEN)); -+} - - /* Free the memory space back to heap */ - static void -diff --git a/lib/librte_eal/include/rte_malloc.h b/lib/librte_eal/include/rte_malloc.h -index 3af64f8..671e4f2 100644 ---- a/lib/librte_eal/include/rte_malloc.h -+++ b/lib/librte_eal/include/rte_malloc.h -@@ -248,6 +248,21 @@ rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int - __rte_alloc_size(2, 3); - - /** -+ * Check the header/tailer cookies of memory pointed to by the provided pointer. -+ * -+ * This pointer must have been returned by a previous call to -+ * rte_malloc(), rte_zmalloc(), rte_calloc() or rte_realloc(). -+ * -+ * @param ptr -+ * The pointer to memory to be checked. -+ * @return -+ * - true if the header/tailer cookies are OK. -+ * - Otherwise, false. -+ */ -+int -+rte_memmory_ok(void *ptr); -+ -+/** - * Frees the memory space pointed to by the provided pointer. - * - * This pointer must have been returned by a previous call to --- -1.8.3.1 - diff --git a/patch/dpdk-stable-20.11.1/0005-bonding-allow-slaves-from-different-numa-nodes.patch b/patch/dpdk-stable-20.11.1/0005-bonding-allow-slaves-from-different-numa-nodes.patch deleted file mode 100644 index 473bec74c..000000000 --- a/patch/dpdk-stable-20.11.1/0005-bonding-allow-slaves-from-different-numa-nodes.patch +++ /dev/null @@ -1,50 +0,0 @@ -From a6393a8d04f1c8a4b324782aa5e242e10043a197 Mon Sep 17 00:00:00 2001 -From: huangyichen -Date: Wed, 4 Aug 2021 15:16:04 +0800 -Subject: [PATCH 5/6] bonding: allow slaves from different numa nodes - -Note the patch may have a negative influnce on performance. -It's not a good practice to bonding slaves across numa nodes. ---- - drivers/net/bonding/rte_eth_bond_pmd.c | 18 ++++++++++++++++-- - 1 file changed, 16 insertions(+), 2 deletions(-) - -diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c -index 057b1ad..53f8ba3 100644 ---- a/drivers/net/bonding/rte_eth_bond_pmd.c -+++ b/drivers/net/bonding/rte_eth_bond_pmd.c -@@ -1762,7 +1762,14 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, - - errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id, - bd_rx_q->nb_rx_desc, -- rte_eth_dev_socket_id(slave_eth_dev->data->port_id), -+ // In spite of performance problem, bonding slaves had better to support -+ // slaves from different numa nodes. Considering that numa node on which -+ // the resources of bonding port is allocated from is specified by -+ // rte_eth_bond_create() at bonding creation, the slave's queue_setup -+ // would fail if specified with the slave's numa node id that is different -+ // from the one of the bonding port. See rte_eth_dma_zone_reserve() for -+ // details. -+ SOCKET_ID_ANY, - &(bd_rx_q->rx_conf), bd_rx_q->mb_pool); - if (errval != 0) { - RTE_BOND_LOG(ERR, -@@ -1778,7 +1785,14 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, - - errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id, - bd_tx_q->nb_tx_desc, -- rte_eth_dev_socket_id(slave_eth_dev->data->port_id), -+ // In spite of performance problem, bonding slaves had better to support -+ // slaves from different numa nodes. Considering that numa node on which -+ // the resources of bonding port is allocated from is specified by -+ // rte_eth_bond_create() at bonding creation, the slave's queue_setup -+ // would fail if specified with the slave's numa node id that is different -+ // from the one of the bonding port. See rte_eth_dma_zone_reserve() for -+ // details. -+ SOCKET_ID_ANY, - &bd_tx_q->tx_conf); - if (errval != 0) { - RTE_BOND_LOG(ERR, --- -1.8.3.1 - diff --git a/patch/dpdk-stable-20.11.1/0006-bonding-fix-bonding-mode-4-problems.patch b/patch/dpdk-stable-20.11.1/0006-bonding-fix-bonding-mode-4-problems.patch deleted file mode 100644 index d2e53511a..000000000 --- a/patch/dpdk-stable-20.11.1/0006-bonding-fix-bonding-mode-4-problems.patch +++ /dev/null @@ -1,144 +0,0 @@ -From 38db21e38a36527a0e2e26f01a4b1f1bfd10c3d6 Mon Sep 17 00:00:00 2001 -From: huangyichen -Date: Wed, 4 Aug 2021 15:14:04 +0800 -Subject: [PATCH 6/6] bonding: fix bonding mode 4 problems - -1. Fix lacp packet receipt problem that is disscussed in issue [#725](https://github.com/iqiyi/dpvs/issues/725) of iqiyi/dpvs in detail. -2. Don't drop multicast/broadcast packets when all-multicast isn't enabled in rx_burst_8023ad. -3. Don't drop lacp packets received from worker queues when dedicated queue enabled. ---- - drivers/net/bonding/rte_eth_bond_8023ad.c | 20 ++++++++------ - drivers/net/bonding/rte_eth_bond_pmd.c | 46 +++++++++++++++++++------------ - 2 files changed, 40 insertions(+), 26 deletions(-) - -diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c b/drivers/net/bonding/rte_eth_bond_8023ad.c -index 5fe004e..52bd960 100644 ---- a/drivers/net/bonding/rte_eth_bond_8023ad.c -+++ b/drivers/net/bonding/rte_eth_bond_8023ad.c -@@ -831,7 +831,6 @@ bond_mode_8023ad_periodic_cb(void *arg) - struct port *port; - struct rte_eth_link link_info; - struct rte_ether_addr slave_addr; -- struct rte_mbuf *lacp_pkt = NULL; - uint16_t slave_id; - uint16_t i; - -@@ -903,6 +902,7 @@ bond_mode_8023ad_periodic_cb(void *arg) - /* Find LACP packet to this port. Do not check subtype, - * it is done in function that queued packet - */ -+ struct rte_mbuf *lacp_pkt = NULL; - int retval = rte_ring_dequeue(port->rx_ring, - (void **)&lacp_pkt); - -@@ -911,15 +911,17 @@ bond_mode_8023ad_periodic_cb(void *arg) - - rx_machine_update(internals, slave_id, lacp_pkt); - } else { -- uint16_t rx_count = rte_eth_rx_burst(slave_id, -- internals->mode4.dedicated_queues.rx_qid, -- &lacp_pkt, 1); -- -- if (rx_count == 1) -- bond_mode_8023ad_handle_slow_pkt(internals, -- slave_id, lacp_pkt); -- else -+ uint16_t rx_count, j; -+ struct rte_mbuf *lacp_pkt[16] = { NULL }; -+ -+ rx_count = rte_eth_rx_burst(slave_id, internals->mode4.dedicated_queues.rx_qid, -+ &lacp_pkt[0], sizeof(lacp_pkt)/sizeof(struct rte_mbuf *)); -+ if (rx_count > 0) { -+ for (j = 0; j < rx_count; j++) -+ bond_mode_8023ad_handle_slow_pkt(internals, slave_id, lacp_pkt[j]); -+ } else { - rx_machine_update(internals, slave_id, NULL); -+ } - } - - periodic_machine(internals, slave_id); -diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c -index 53f8ba3..42e436c 100644 ---- a/drivers/net/bonding/rte_eth_bond_pmd.c -+++ b/drivers/net/bonding/rte_eth_bond_pmd.c -@@ -291,7 +291,6 @@ rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, - - uint8_t collecting; /* current slave collecting status */ - const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id); -- const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id); - uint8_t subtype; - uint16_t i; - uint16_t j; -@@ -322,6 +321,15 @@ rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, - - /* Handle slow protocol packets. */ - while (j < num_rx_total) { -+ /* If packet is not pure L2 and is known: -+ * Such as OSPF protocol multcast packet, -+ * we want to handle it in user mode by ourselves, -+ * skip slow protocol flow */ -+ if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) { -+ j++; -+ continue; -+ } -+ - if (j + 3 < num_rx_total) - rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *)); - -@@ -331,24 +339,26 @@ rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, - /* Remove packet from array if: - * - it is slow packet but no dedicated rxq is present, - * - slave is not in collecting state, -- * - bonding interface is not in promiscuous mode: -- * - packet is unicast and address does not match, -- * - packet is multicast and bonding interface -- * is not in allmulti, -+ * - bonding interface is not in promiscuous mode and -+ * packet is unicast and address does not match, - */ - if (unlikely( -- (!dedicated_rxq && -- is_lacp_packets(hdr->ether_type, subtype, -- bufs[j])) || -- !collecting || -- (!promisc && -- ((rte_is_unicast_ether_addr(&hdr->d_addr) && -- !rte_is_same_ether_addr(bond_mac, -- &hdr->d_addr)) || -- (!allmulti && -- rte_is_multicast_ether_addr(&hdr->d_addr)))))) { -- -+ (is_lacp_packets(hdr->ether_type, subtype, bufs[j])) || -+ !collecting || (!promisc && -+ (rte_is_unicast_ether_addr(&hdr->d_addr) && -+ !rte_is_same_ether_addr(bond_mac, &hdr->d_addr))))) { - if (hdr->ether_type == ether_type_slow_be) { -+ if (dedicated_rxq) { -+ /* Error! Lacp packets should never appear here if -+ * dedicated queue enabled. This can be caused by -+ * a lack of support for ethertype rte_flow. Just -+ * issue a warning rather than dropping the packets -+ * so that the lacp state machine can work properly. -+ */ -+ RTE_BOND_LOG(WARNING, "receive lacp packets from queue %d " -+ "of port %d when dedicated queue enabled", -+ bd_rx_q->queue_id, slaves[idx]); -+ } - bond_mode_8023ad_handle_slow_pkt( - internals, slaves[idx], bufs[j]); - } else -@@ -1271,8 +1281,10 @@ skip_tx_ring: - slave_port_ids[i]; - } - -- if (unlikely(dist_slave_count < 1)) -+ if (unlikely(dist_slave_count < 1)) { -+ RTE_BOND_LOG(WARNING, "no distributing slaves on bonding port %d", internals->port_id); - return 0; -+ } - - return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids, - dist_slave_count); --- -1.8.3.1 - diff --git a/patch/dpdk-stable-20.11.1/0007-bonding-device-sends-packets-with-user-specified-sal.patch b/patch/dpdk-stable-20.11.1/0007-bonding-device-sends-packets-with-user-specified-sal.patch deleted file mode 100644 index d7e4e0c6d..000000000 --- a/patch/dpdk-stable-20.11.1/0007-bonding-device-sends-packets-with-user-specified-sal.patch +++ /dev/null @@ -1,91 +0,0 @@ -From 7024d80414e914a54c301dbcc9bb4cf6fb5f927b Mon Sep 17 00:00:00 2001 -From: yuwenchao -Date: Tue, 30 Jul 2024 15:39:28 +0800 -Subject: [PATCH] bonding device sends packets with user specified salve port - -The outgoing slave port is specified in mbuf field "hash.txadapter.reserved2". -Support the following 3 bonding mode: -- mode 0: round robin -- mode 2: balance -- mode 4: 8023ad - -Signed-off-by: yuwenchao ---- - drivers/net/bonding/rte_eth_bond_pmd.c | 26 ++++++++++++++++++++++++-- - lib/librte_mbuf/rte_mbuf.h | 2 ++ - 2 files changed, 26 insertions(+), 2 deletions(-) - -diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c -index 42e436c..a35422c 100644 ---- a/drivers/net/bonding/rte_eth_bond_pmd.c -+++ b/drivers/net/bonding/rte_eth_bond_pmd.c -@@ -573,6 +573,22 @@ struct client_stats_t { - return nb_recv_pkts; - } - -+static inline int -+bond_ethdev_populate_slave_by_user(const struct rte_mbuf *mbuf, const uint16_t *slaves, -+ int num_slave) -+{ -+ uint16_t i, pid = mbuf->hash.txadapter.reserved2; -+ -+ if (likely(pid == RTE_MBUF_PORT_INVALID)) -+ return -1; -+ -+ for (i = 0; i < num_slave; i++) { -+ if (slaves[i] == pid) -+ return i; -+ } -+ return -1; -+} -+ - static uint16_t - bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs, - uint16_t nb_pkts) -@@ -605,7 +621,9 @@ struct client_stats_t { - - /* Populate slaves mbuf with which packets are to be sent on it */ - for (i = 0; i < nb_pkts; i++) { -- cslave_idx = (slave_idx + i) % num_of_slaves; -+ cslave_idx = bond_ethdev_populate_slave_by_user(bufs[i], slaves, num_of_slaves); -+ if (likely(cslave_idx < 0)) -+ cslave_idx = (slave_idx + i) % num_of_slaves; - slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i]; - } - -@@ -1162,7 +1180,11 @@ struct bwg_slave { - - for (i = 0; i < nb_bufs; i++) { - /* Populate slave mbuf arrays with mbufs for that slave. */ -- uint16_t slave_idx = bufs_slave_port_idxs[i]; -+ int slave_idx; -+ -+ slave_idx = bond_ethdev_populate_slave_by_user(bufs[i], slave_port_ids, slave_count); -+ if (likely(slave_idx < 0)) -+ slave_idx = bufs_slave_port_idxs[i]; - - slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i]; - } -diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h -index c4c9ebf..130b99d 100644 ---- a/lib/librte_mbuf/rte_mbuf.h -+++ b/lib/librte_mbuf/rte_mbuf.h -@@ -589,6 +589,7 @@ static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp) - - if (rte_mempool_get(mp, (void **)&m) < 0) - return NULL; -+ m->hash.txadapter.reserved2 = RTE_MBUF_PORT_INVALID; - __rte_mbuf_raw_sanity_check(m); - return m; - } -@@ -867,6 +868,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m) - m->vlan_tci_outer = 0; - m->nb_segs = 1; - m->port = RTE_MBUF_PORT_INVALID; -+ m->hash.txadapter.reserved2 = RTE_MBUF_PORT_INVALID; - - m->ol_flags &= EXT_ATTACHED_MBUF; - m->packet_type = 0; --- -1.8.3.1 - diff --git a/patch/dpdk-stable-20.11.10/0001-kni-use-netlink-event-for-multicast-driver-part.patch b/patch/dpdk-stable-20.11.10/0001-kni-use-netlink-event-for-multicast-driver-part.patch deleted file mode 100644 index 8f9c28635..000000000 --- a/patch/dpdk-stable-20.11.10/0001-kni-use-netlink-event-for-multicast-driver-part.patch +++ /dev/null @@ -1,128 +0,0 @@ -From 3e182c106d61863a55e35425e2afefcc222f8f92 Mon Sep 17 00:00:00 2001 -From: yuwenchao -Date: Thu, 1 Aug 2024 17:18:30 +0800 -Subject: [PATCH 1/7] kni: use netlink event for multicast (driver part) - -Signed-off-by: yuwenchao ---- - kernel/linux/kni/kni_net.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 76 insertions(+) - -diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c -index 779ee34..31e9e39 100644 ---- a/kernel/linux/kni/kni_net.c -+++ b/kernel/linux/kni/kni_net.c -@@ -17,6 +17,8 @@ - #include - #include - #include -+#include -+#include - #include - - #include -@@ -147,6 +149,7 @@ - ret_val = wait_event_interruptible_timeout(kni->wq, - kni_fifo_count(kni->resp_q), 3 * HZ); - if (signal_pending(current) || ret_val <= 0) { -+ pr_err("%s: wait_event_interruptible timeout\n", __func__); - ret = -ETIME; - goto fail; - } -@@ -690,6 +693,77 @@ void kni_net_release_fifo_phy(struct kni_dev *kni) - return (ret == 0) ? req.result : ret; - } - -+static size_t -+kni_nlmsg_size(void) -+{ -+ return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) -+ + nla_total_size(4) /* IFA_ADDRESS */ -+ + nla_total_size(4) /* IFA_LOCAL */ -+ + nla_total_size(4) /* IFA_BROADCAST */ -+ + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ -+ + nla_total_size(4) /* IFA_FLAGS */ -+ + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ -+} -+ -+static void -+kni_net_set_rx_mode(struct net_device *dev) -+{ -+ /* -+ * send event to notify user (DPDK KNI app) that multicast list changed, -+ * so that it can monitor multicast join/leave and set HW mc-addrs to -+ * kni dev accordinglly. -+ * -+ * this event is just an notification, we do not save any mc-addr here -+ * (so attribute space for us). user kni app should get maddrs after -+ * receive this notification. -+ * -+ * I was expecting kernel send some rtnl event for multicast join/leave, -+ * but it doesn't. By checking the call-chain of SIOCADDMULTI (ip maddr, -+ * manages only hardware multicast) and IP_ADD_MEMBERSHIP (ip_mc_join_group, -+ * used to for IPv4 multicast), no rtnl event sent. -+ * -+ * so as workaround, modify kni driver here to send RTM_NEWADDR. -+ * it may not suitalbe to use this event for mcast, but that should works. -+ * hope that won't affect other listener to this event. -+ * -+ * previous solution was using rte_kni_request to pass hw-maddr list to user. -+ * it "works" for times but finally memory corruption found, which is -+ * not easy to address (lock was added and reviewed). That's why we use -+ * netlink event instead. -+ */ -+ struct sk_buff *skb; -+ struct net *net = dev_net(dev); -+ struct nlmsghdr *nlh; -+ struct ifaddrmsg *ifm; -+ -+ skb = nlmsg_new(kni_nlmsg_size(), GFP_ATOMIC); -+ if (!skb) -+ return; -+ -+ /* no other event for us ? */ -+ nlh = nlmsg_put(skb, 0, 0, RTM_NEWADDR, sizeof(*ifm), 0); -+ if (!nlh) { -+ kfree_skb(skb); -+ return; -+ } -+ -+ /* just send an notification so no other info */ -+ ifm = nlmsg_data(nlh); -+ memset(ifm, 0, sizeof(*ifm)); -+ ifm->ifa_family = AF_UNSPEC; -+ ifm->ifa_prefixlen = 0; -+ ifm->ifa_flags = 0; -+ ifm->ifa_scope = RT_SCOPE_NOWHERE; -+ ifm->ifa_index = 0; -+ -+ nlmsg_end(skb, nlh); -+ -+ /* other group ? */ -+ pr_debug("%s: rx-mode/multicast-list changed\n", __func__); -+ rtnl_notify(skb, net, 0, RTNLGRP_NOTIFY, NULL, GFP_ATOMIC); -+ return; -+} -+ - static void - kni_net_change_rx_flags(struct net_device *netdev, int flags) - { -@@ -791,6 +865,7 @@ void kni_net_release_fifo_phy(struct kni_dev *kni) - - ret = kni_net_process_request(netdev, &req); - -+ pr_info("%s request returns %d!\n", __func__, ret); - return (ret == 0 ? req.result : ret); - } - -@@ -822,6 +897,7 @@ void kni_net_release_fifo_phy(struct kni_dev *kni) - .ndo_change_rx_flags = kni_net_change_rx_flags, - .ndo_start_xmit = kni_net_tx, - .ndo_change_mtu = kni_net_change_mtu, -+ .ndo_set_rx_mode = kni_net_set_rx_mode, - .ndo_tx_timeout = kni_net_tx_timeout, - .ndo_set_mac_address = kni_net_set_mac, - #ifdef HAVE_CHANGE_CARRIER_CB --- -1.8.3.1 - diff --git a/patch/dpdk-stable-20.11.10/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch b/patch/dpdk-stable-20.11.10/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch deleted file mode 100644 index 5d71abd55..000000000 --- a/patch/dpdk-stable-20.11.10/0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch +++ /dev/null @@ -1,555 +0,0 @@ -From 288a252c8b65ea6c811100b3472367891f298f7d Mon Sep 17 00:00:00 2001 -From: huangyichen -Date: Thu, 1 Jul 2021 21:23:50 +0800 -Subject: [PATCH 2/7] pdump: change dpdk-pdump tool for dpvs - ---- - app/pdump/main.c | 167 ++++++++++++++++++++++++++++++++++++++++--- - lib/librte_pdump/rte_pdump.c | 145 +++++++++++++++++++++++++++++++++++-- - lib/librte_pdump/rte_pdump.h | 27 +++++++ - 3 files changed, 327 insertions(+), 12 deletions(-) - -diff --git a/app/pdump/main.c b/app/pdump/main.c -index 36b14fa..5b4217e 100644 ---- a/app/pdump/main.c -+++ b/app/pdump/main.c -@@ -27,6 +27,7 @@ - #include - #include - #include -+#include - - #define CMD_LINE_OPT_PDUMP "pdump" - #define CMD_LINE_OPT_PDUMP_NUM 256 -@@ -42,6 +43,14 @@ - #define PDUMP_MSIZE_ARG "mbuf-size" - #define PDUMP_NUM_MBUFS_ARG "total-num-mbufs" - -+#define PDUMP_HOST_ARG "host" -+#define PDUMP_SRC_ARG "src-host" -+#define PDUMP_DST_ARG "dst-host" -+#define PDUMP_PROTO_PORT_AGE "proto-port" -+#define PDUMP_SPORT_ARG "src-port" -+#define PDUMP_DPORT_ARG "dst-port" -+#define PDUMP_PROTO_ARG "proto" -+ - #define VDEV_NAME_FMT "net_pcap_%s_%d" - #define VDEV_PCAP_ARGS_FMT "tx_pcap=%s" - #define VDEV_IFACE_ARGS_FMT "tx_iface=%s" -@@ -97,6 +106,13 @@ enum pdump_by { - PDUMP_RING_SIZE_ARG, - PDUMP_MSIZE_ARG, - PDUMP_NUM_MBUFS_ARG, -+ PDUMP_HOST_ARG, -+ PDUMP_SRC_ARG, -+ PDUMP_DST_ARG, -+ PDUMP_PROTO_PORT_AGE, -+ PDUMP_SPORT_ARG, -+ PDUMP_DPORT_ARG, -+ PDUMP_PROTO_ARG, - NULL - }; - -@@ -130,6 +146,7 @@ struct pdump_tuples { - enum pcap_stream rx_vdev_stream_type; - enum pcap_stream tx_vdev_stream_type; - bool single_pdump_dev; -+ struct pdump_filter *filter; - - /* stats */ - struct pdump_stats stats; -@@ -158,6 +175,11 @@ struct parse_val { - "(queue=)," - "(rx-dev= |" - " tx-dev=," -+ "[host= | src-host= |" -+ "dst-host=]," -+ "[proto=support:tcp/udp/icmp]," -+ "[proto-port= |src-port= |" -+ "dst-port=]," - "[ring-size=default:16384]," - "[mbuf-size=default:2176]," - "[total-num-mbufs=default:65535]'\n", -@@ -244,6 +266,64 @@ struct parse_val { - } - - static int -+parse_host(const char *key __rte_unused, const char *value, void *extra_args) -+{ -+ struct pdump_tuples *pt = extra_args; -+ struct in_addr inaddr; -+ struct in6_addr inaddr6; -+ union addr addr; -+ int af = 0; -+ -+ if (inet_pton(AF_INET6, value, &inaddr6) > 0) { -+ af = AF_INET6; -+ addr.in6 = inaddr6; -+ } else if (inet_pton(AF_INET, value, &inaddr) > 0){ -+ af = AF_INET; -+ addr.in = inaddr; -+ } else { -+ printf("IP address invaled\n"); -+ return -EINVAL; -+ } -+ -+ if (pt->filter && pt->filter->af != 0 && af != pt->filter->af) { -+ printf("IPv4 and IPv6 conflict\n"); -+ return -EINVAL; -+ } else { -+ pt->filter->af = af; -+ } -+ -+ if (!strcmp(key, PDUMP_HOST_ARG)) { -+ rte_memcpy(&pt->filter->host_addr, &addr, sizeof(addr)); -+ } else if (!strcmp(key, PDUMP_SRC_ARG)) { -+ rte_memcpy(&pt->filter->s_addr, &addr, sizeof(addr)); -+ } else if (!strcmp(key, PDUMP_DST_ARG)) { -+ rte_memcpy(&pt->filter->d_addr, &addr, sizeof(addr)); -+ } -+ -+ return 0; -+} -+ -+static int -+parse_proto(const char *key __rte_unused, const char *value, void *extra_args) -+{ -+ struct pdump_tuples *pt = extra_args; -+ -+ if (!strcmp(value, "tcp")) { -+ pt->filter->proto = IPPROTO_TCP; -+ } else if (!strcmp(value, "udp")) { -+ pt->filter->proto = IPPROTO_UDP; -+ } else if (!strcmp(value, "icmp")) { -+ pt->filter->proto = IPPROTO_ICMP; -+ } else { -+ printf("invalid value:\"%s\" for key:\"%s\", " -+ "value must be tcp/udp/icmp\n", value, key); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+static int - parse_pdump(const char *optarg) - { - struct rte_kvargs *kvlist; -@@ -370,6 +450,75 @@ struct parse_val { - } else - pt->total_num_mbufs = MBUFS_PER_POOL; - -+ /* filter parsing and validation */ -+ pt->filter = rte_zmalloc("pdump_filter", -+ sizeof(struct pdump_filter), 0); -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_HOST_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_HOST_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_SRC_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_SRC_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_DST_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_DST_ARG, -+ &parse_host, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_PORT_AGE); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_PORT_AGE, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->proto_port = (uint16_t) v.val; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_SPORT_ARG); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_SPORT_ARG, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->s_port = (uint16_t) v.val; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_DPORT_ARG); -+ if (cnt1 == 1) { -+ v.min = 1; -+ v.max = UINT16_MAX; -+ ret = rte_kvargs_process(kvlist, PDUMP_DPORT_ARG, -+ &parse_uint_value, &v); -+ if (ret < 0) -+ goto free_kvlist; -+ pt->filter->d_port = (uint16_t) v.val; -+ } -+ -+ cnt1 = rte_kvargs_count(kvlist, PDUMP_PROTO_ARG); -+ if (cnt1 == 1) { -+ ret = rte_kvargs_process(kvlist, PDUMP_PROTO_ARG, -+ &parse_proto, pt); -+ if (ret < 0) -+ goto free_kvlist; -+ } -+ - num_tuples++; - - free_kvlist: -@@ -510,6 +659,8 @@ struct parse_val { - rte_ring_free(pt->rx_ring); - if (pt->tx_ring) - rte_ring_free(pt->tx_ring); -+ if (pt->filter) -+ rte_free(pt->filter); - } - } - -@@ -837,20 +988,20 @@ struct parse_val { - pt->queue, - RTE_PDUMP_FLAG_RX, - pt->rx_ring, -- pt->mp, NULL); -+ pt->mp, pt->filter); - ret1 = rte_pdump_enable_by_deviceid( - pt->device_id, - pt->queue, - RTE_PDUMP_FLAG_TX, - pt->tx_ring, -- pt->mp, NULL); -+ pt->mp, pt->filter); - } else if (pt->dump_by_type == PORT_ID) { - ret = rte_pdump_enable(pt->port, pt->queue, - RTE_PDUMP_FLAG_RX, -- pt->rx_ring, pt->mp, NULL); -+ pt->rx_ring, pt->mp, pt->filter); - ret1 = rte_pdump_enable(pt->port, pt->queue, - RTE_PDUMP_FLAG_TX, -- pt->tx_ring, pt->mp, NULL); -+ pt->tx_ring, pt->mp, pt->filter); - } - } else if (pt->dir == RTE_PDUMP_FLAG_RX) { - if (pt->dump_by_type == DEVICE_ID) -@@ -858,22 +1009,22 @@ struct parse_val { - pt->device_id, - pt->queue, - pt->dir, pt->rx_ring, -- pt->mp, NULL); -+ pt->mp, pt->filter); - else if (pt->dump_by_type == PORT_ID) - ret = rte_pdump_enable(pt->port, pt->queue, - pt->dir, -- pt->rx_ring, pt->mp, NULL); -+ pt->rx_ring, pt->mp, pt->filter); - } else if (pt->dir == RTE_PDUMP_FLAG_TX) { - if (pt->dump_by_type == DEVICE_ID) - ret = rte_pdump_enable_by_deviceid( - pt->device_id, - pt->queue, - pt->dir, -- pt->tx_ring, pt->mp, NULL); -+ pt->tx_ring, pt->mp, pt->filter); - else if (pt->dump_by_type == PORT_ID) - ret = rte_pdump_enable(pt->port, pt->queue, - pt->dir, -- pt->tx_ring, pt->mp, NULL); -+ pt->tx_ring, pt->mp, pt->filter); - } - if (ret < 0 || ret1 < 0) { - cleanup_pdump_resources(); -diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c -index 746005a..8a252d5 100644 ---- a/lib/librte_pdump/rte_pdump.c -+++ b/lib/librte_pdump/rte_pdump.c -@@ -9,6 +9,10 @@ - #include - #include - #include -+#include -+#include -+#include -+#include - - #include "rte_pdump.h" - -@@ -69,6 +73,132 @@ struct pdump_response { - } rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT], - tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT]; - -+static int -+inet_addr_equal(int af, const union addr *a1, -+ const union addr *a2) -+{ -+ switch (af) { -+ case AF_INET: -+ return a1->in.s_addr == a2->in.s_addr; -+ case AF_INET6: -+ return memcmp(a1->in6.s6_addr, a2->in6.s6_addr, 16) == 0; -+ default: -+ return memcmp(a1, a2, sizeof(union addr)) == 0; -+ } -+} -+ -+static int -+inet_is_addr_any(int af, const union addr *addr) -+{ -+ switch (af) { -+ case AF_INET: -+ return addr->in.s_addr == htonl(INADDR_ANY); -+ case AF_INET6: -+ return IN6_ARE_ADDR_EQUAL(&addr->in6, &in6addr_any); -+ default: -+ return -1; -+ } -+ -+ return -1; -+} -+static int -+pdump_filter(struct rte_mbuf *m, struct pdump_filter *filter) -+{ -+ struct rte_ether_hdr *eth_hdr; -+ struct vlan_eth_hdr *vlan_eth_hdr; -+ union addr s_addr, d_addr; -+ int prepend = 0; -+ uint16_t type = 0; -+ uint16_t iph_len = 0; -+ uint8_t proto = 0; -+ -+ int af; -+ -+ if (filter->af == 0 && filter->s_port == 0 && -+ filter->d_port == 0 && filter->proto == 0 && -+ filter->proto_port == 0) -+ return 0; -+ -+ eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); -+ -+ if (eth_hdr->ether_type == htons(ETH_P_8021Q)) { -+ prepend += sizeof(struct vlan_eth_hdr); -+ vlan_eth_hdr = rte_pktmbuf_mtod(m, struct vlan_eth_hdr *); -+ type = vlan_eth_hdr->h_vlan_encapsulated_proto; -+ } else { -+ prepend += sizeof(struct rte_ether_hdr); -+ eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); -+ type = eth_hdr->ether_type; -+ } -+ -+ if (rte_pktmbuf_adj(m, prepend) == NULL) -+ goto prepend; -+ -+ if (type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) { -+ struct rte_arp_hdr *arp = rte_pktmbuf_mtod(m, struct rte_arp_hdr *); -+ af = AF_INET; -+ s_addr.in.s_addr = arp->arp_data.arp_sip; -+ d_addr.in.s_addr = arp->arp_data.arp_tip; -+ } else if (type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) { -+ struct rte_ipv4_hdr *ip4 = rte_pktmbuf_mtod(m, struct rte_ipv4_hdr *); -+ af = AF_INET; -+ s_addr.in.s_addr = ip4->src_addr; -+ d_addr.in.s_addr = ip4->dst_addr; -+ proto = ip4->next_proto_id; -+ iph_len = (ip4->version_ihl & 0xf) << 2; -+ } else if (type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) { -+ struct rte_ipv6_hdr *ip6 = rte_pktmbuf_mtod(m, struct rte_ipv6_hdr *); -+ af = AF_INET6; -+ rte_memcpy(&s_addr.in6, &ip6->src_addr, 16); -+ rte_memcpy(&d_addr.in6, &ip6->dst_addr, 16); -+ proto = ip6->proto; -+ iph_len = sizeof(struct rte_ipv6_hdr); -+ } else { -+ goto prepend; -+ } -+ -+ /*filter*/ -+ if (!inet_is_addr_any(af, &filter->s_addr) && -+ !inet_addr_equal(af, &filter->s_addr, &s_addr)) -+ goto prepend; -+ if (!inet_is_addr_any(af, &filter->d_addr) && -+ !inet_addr_equal(af, &filter->d_addr, &d_addr)) -+ goto prepend; -+ if (!inet_is_addr_any(af, &filter->host_addr) && -+ !inet_addr_equal(af, &filter->host_addr, &s_addr) && -+ !inet_addr_equal(af, &filter->host_addr, &d_addr)) -+ goto prepend; -+ -+ if (filter->proto && filter->proto != proto) -+ goto prepend; -+ -+ if (filter->s_port || filter->d_port || filter->proto_port) { -+ if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) -+ goto prepend; -+ struct rte_udp_hdr _uh; -+ const struct rte_udp_hdr *uh; -+ uh = rte_pktmbuf_read(m, iph_len, sizeof(_uh), &_uh); -+ if (uh == NULL) -+ goto prepend; -+ if (filter->s_port && filter->s_port != rte_cpu_to_be_16(uh->src_port)) -+ goto prepend; -+ -+ if (filter->d_port && filter->d_port != rte_cpu_to_be_16(uh->dst_port)) -+ goto prepend; -+ -+ if (filter->proto_port && -+ filter->proto_port != rte_cpu_to_be_16(uh->src_port) && -+ filter->proto_port != rte_cpu_to_be_16(uh->dst_port)) -+ goto prepend; -+ } -+ -+ rte_pktmbuf_prepend(m, prepend); -+ return 0; -+ -+prepend: -+ rte_pktmbuf_prepend(m, prepend); -+ return -1; -+} - - static inline void - pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params) -@@ -86,6 +216,8 @@ struct pdump_response { - ring = cbs->ring; - mp = cbs->mp; - for (i = 0; i < nb_pkts; i++) { -+ if (pdump_filter(pkts[i], cbs->filter) != 0) -+ continue; - p = rte_pktmbuf_copy(pkts[i], mp, 0, UINT32_MAX); - if (p) - dup_bufs[d_pkts++] = p; -@@ -122,7 +254,7 @@ struct pdump_response { - static int - pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, - struct rte_ring *ring, struct rte_mempool *mp, -- uint16_t operation) -+ struct pdump_filter *filter, uint16_t operation) - { - uint16_t qid; - struct pdump_rxtx_cbs *cbs = NULL; -@@ -140,6 +272,7 @@ struct pdump_response { - } - cbs->ring = ring; - cbs->mp = mp; -+ cbs->filter = filter; - cbs->cb = rte_eth_add_first_rx_callback(port, qid, - pdump_rx, cbs); - if (cbs->cb == NULL) { -@@ -176,7 +309,7 @@ struct pdump_response { - static int - pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, - struct rte_ring *ring, struct rte_mempool *mp, -- uint16_t operation) -+ struct pdump_filter *filter, uint16_t operation) - { - - uint16_t qid; -@@ -195,6 +328,7 @@ struct pdump_response { - } - cbs->ring = ring; - cbs->mp = mp; -+ cbs->filter = filter; - cbs->cb = rte_eth_add_tx_callback(port, qid, pdump_tx, - cbs); - if (cbs->cb == NULL) { -@@ -238,6 +372,7 @@ struct pdump_response { - uint16_t operation; - struct rte_ring *ring; - struct rte_mempool *mp; -+ struct pdump_filter *filter; - - flags = p->flags; - operation = p->op; -@@ -253,6 +388,7 @@ struct pdump_response { - queue = p->data.en_v1.queue; - ring = p->data.en_v1.ring; - mp = p->data.en_v1.mp; -+ filter = p->data.en_v1.filter; - } else { - ret = rte_eth_dev_get_port_by_name(p->data.dis_v1.device, - &port); -@@ -265,6 +401,7 @@ struct pdump_response { - queue = p->data.dis_v1.queue; - ring = p->data.dis_v1.ring; - mp = p->data.dis_v1.mp; -+ filter = p->data.dis_v1.filter; - } - - /* validation if packet capture is for all queues */ -@@ -303,7 +440,7 @@ struct pdump_response { - if (flags & RTE_PDUMP_FLAG_RX) { - end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_rx_q : queue + 1; - ret = pdump_register_rx_callbacks(end_q, port, queue, ring, mp, -- operation); -+ filter, operation); - if (ret < 0) - return ret; - } -@@ -312,7 +449,7 @@ struct pdump_response { - if (flags & RTE_PDUMP_FLAG_TX) { - end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_tx_q : queue + 1; - ret = pdump_register_tx_callbacks(end_q, port, queue, ring, mp, -- operation); -+ filter, operation); - if (ret < 0) - return ret; - } -diff --git a/lib/librte_pdump/rte_pdump.h b/lib/librte_pdump/rte_pdump.h -index 6b00fc1..3986b07 100644 ---- a/lib/librte_pdump/rte_pdump.h -+++ b/lib/librte_pdump/rte_pdump.h -@@ -15,6 +15,8 @@ - #include - #include - #include -+#include -+#include - - #ifdef __cplusplus - extern "C" { -@@ -29,6 +31,31 @@ enum { - RTE_PDUMP_FLAG_RXTX = (RTE_PDUMP_FLAG_RX|RTE_PDUMP_FLAG_TX) - }; - -+union addr { -+ struct in_addr in; -+ struct in6_addr in6; -+}; -+ -+struct pdump_filter { -+ int af; -+ union addr s_addr; -+ union addr d_addr; -+ union addr host_addr; //s_addr or d_addr -+ -+ uint8_t proto; -+ uint16_t proto_port; //s_port or d_port -+ uint16_t s_port; -+ uint16_t d_port; -+}; -+ -+struct vlan_eth_hdr { -+ unsigned char h_dest[ETH_ALEN]; -+ unsigned char h_source[ETH_ALEN]; -+ unsigned short h_vlan_proto; -+ unsigned short h_vlan_TCI; -+ unsigned short h_vlan_encapsulated_proto; -+}; -+ - /** - * Initialize packet capturing handling - * --- -1.8.3.1 - diff --git a/patch/dpdk-stable-20.11.10/0003-debug-enable-dpdk-eal-memory-debug.patch b/patch/dpdk-stable-20.11.10/0003-debug-enable-dpdk-eal-memory-debug.patch deleted file mode 100644 index 77a18998e..000000000 --- a/patch/dpdk-stable-20.11.10/0003-debug-enable-dpdk-eal-memory-debug.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 3263fcc900f9e97cf777cb1ad2d84408f6fe7bcf Mon Sep 17 00:00:00 2001 -From: huangyichen -Date: Thu, 1 Jul 2021 21:24:47 +0800 -Subject: [PATCH 3/7] debug: enable dpdk eal memory debug - -The patch is used for memory debug. To use the patch, configure meson with option --Dc_args="-DRTE_MALLOC_DEBUG" when building dpdk. For example, - -meson -Dc_args="-DRTE_MALLOC_DEBUG" -Dbuildtype=debug -Dprefix=$(pwd)/dpdklib dpdkbuild -ninja -C dpdkbuild ---- - lib/librte_eal/common/rte_malloc.c | 4 ++++ - lib/librte_eal/include/rte_malloc.h | 15 +++++++++++++++ - 2 files changed, 19 insertions(+) - -diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c -index 684af4e..cc7ebb6 100644 ---- a/lib/librte_eal/common/rte_malloc.c -+++ b/lib/librte_eal/common/rte_malloc.c -@@ -30,6 +30,10 @@ - #include "eal_memcfg.h" - #include "eal_private.h" - -+int rte_memmory_ok(void *addr) -+{ -+ return malloc_elem_cookies_ok(RTE_PTR_SUB(addr, MALLOC_ELEM_HEADER_LEN)); -+} - - /* Free the memory space back to heap */ - static void -diff --git a/lib/librte_eal/include/rte_malloc.h b/lib/librte_eal/include/rte_malloc.h -index c8da894..3756d0d 100644 ---- a/lib/librte_eal/include/rte_malloc.h -+++ b/lib/librte_eal/include/rte_malloc.h -@@ -248,6 +248,21 @@ struct rte_malloc_socket_stats { - __rte_alloc_size(2, 3); - - /** -+ * Check the header/tailer cookies of memory pointed to by the provided pointer. -+ * -+ * This pointer must have been returned by a previous call to -+ * rte_malloc(), rte_zmalloc(), rte_calloc() or rte_realloc(). -+ * -+ * @param ptr -+ * The pointer to memory to be checked. -+ * @return -+ * - true if the header/tailer cookies are OK. -+ * - Otherwise, false. -+ */ -+int -+rte_memmory_ok(void *ptr); -+ -+/** - * Frees the memory space pointed to by the provided pointer. - * - * This pointer must have been returned by a previous call to --- -1.8.3.1 - diff --git a/patch/dpdk-stable-20.11.10/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch b/patch/dpdk-stable-20.11.10/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch deleted file mode 100644 index 136ba76a9..000000000 --- a/patch/dpdk-stable-20.11.10/0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch +++ /dev/null @@ -1,256 +0,0 @@ -From 4b9735e0d479916ec0e7636e5440d4538b349148 Mon Sep 17 00:00:00 2001 -From: huangyichen -Date: Fri, 2 Jul 2021 11:55:47 +0800 -Subject: [PATCH 4/7] ixgbe_flow: patch ixgbe fdir rte_flow for dpvs - -1. Ignore fdir flow rule priority attribute. -2. Use different fdir soft-id for flow rules configured for the same queue. -3. Disable fdir mask settings by rte_flow. -4. Allow IPv6 to pass flow rule ETH item validation. -5. TCP & UDP flow item dest port = 0 is invalid of ixgbe_parse_ntuple_filter() -6. Safe free ixgbe_flow_list item of MARCO RTE_MALLOC_DEBUG is define (configure meson with option -Dc_args="-DRTE_MALLOC_DEBUG") ---- - drivers/net/ixgbe/ixgbe_flow.c | 119 ++++++++++++++++++++++++++++++++++++----- - 1 file changed, 105 insertions(+), 14 deletions(-) - -diff --git a/drivers/net/ixgbe/ixgbe_flow.c b/drivers/net/ixgbe/ixgbe_flow.c -index 7e5b684..e9bc402 100644 ---- a/drivers/net/ixgbe/ixgbe_flow.c -+++ b/drivers/net/ixgbe/ixgbe_flow.c -@@ -2,7 +2,6 @@ - * Copyright(c) 2010-2016 Intel Corporation - */ - --#include - #include - #include - #include -@@ -15,6 +14,7 @@ - #include - #include - -+#include - #include - #include - #include -@@ -468,6 +468,29 @@ const struct rte_flow_action *next_no_void_action( - } - - tcp_spec = item->spec; -+ /* -+ * DPVS filted by fdir is expected, -+ * With dpvs single worker mode pattern had set: -+ * ----------------------------------------------- -+ * ITEM Spec Mask -+ * ETH NULL NULL -+ * IPV4|6 src_addr 0 0 -+ * dst_addr laddr 0xFFFFFFFF -+ * UDP|TCP src_port 0 0 -+ * dst_port 0 0 -+ * END -+ * ----------------------------------------------- -+ * It should return error here -+ * And continue by ixgbe_parse_fdir_filter() -+ * */ -+ if (tcp_spec->hdr.dst_port == 0 && -+ tcp_mask->hdr.dst_port == 0) { -+ memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); -+ rte_flow_error_set(error, EINVAL, -+ RTE_FLOW_ERROR_TYPE_ITEM, -+ item, "Not supported by ntuple filter"); -+ return -rte_errno; -+ } - filter->dst_port = tcp_spec->hdr.dst_port; - filter->src_port = tcp_spec->hdr.src_port; - filter->tcp_flags = tcp_spec->hdr.tcp_flags; -@@ -501,6 +524,30 @@ const struct rte_flow_action *next_no_void_action( - filter->src_port_mask = udp_mask->hdr.src_port; - - udp_spec = item->spec; -+ /* -+ * DPVS filted by fdir is expected, -+ * With dpvs single worker mode pattern had set: -+ * ----------------------------------------------- -+ * ITEM Spec Mask -+ * ETH NULL NULL -+ * IPV4|6 src_addr 0 0 -+ * dst_addr laddr 0xFFFFFFFF -+ * UDP|TCP src_port 0 0 -+ * dst_port 0 0 -+ * END -+ * ----------------------------------------------- -+ * It should return error here -+ * And continue by ixgbe_parse_fdir_filter() -+ * */ -+ -+ if (udp_spec->hdr.dst_port == 0 && -+ udp_mask->hdr.dst_port == 0) { -+ memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); -+ rte_flow_error_set(error, EINVAL, -+ RTE_FLOW_ERROR_TYPE_ITEM, -+ item, "Not supported by ntuple filter"); -+ return -rte_errno; -+ } - filter->dst_port = udp_spec->hdr.dst_port; - filter->src_port = udp_spec->hdr.src_port; - } else if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { -@@ -1419,11 +1466,8 @@ const struct rte_flow_action *next_no_void_action( - - /* not supported */ - if (attr->priority) { -- memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); -- rte_flow_error_set(error, EINVAL, -- RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, -- attr, "Not support priority."); -- return -rte_errno; -+ PMD_DRV_LOG(INFO, "ixgbe flow doesn't support priority %d " -+ "(priority must be 0), ignore and continue....\n", attr->priority); - } - - /* check if the first not void action is QUEUE or DROP. */ -@@ -1642,7 +1686,7 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) - * value. So, we need not do anything for the not provided fields later. - */ - memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); -- memset(&rule->mask, 0xFF, sizeof(struct ixgbe_hw_fdir_mask)); -+ memset(&rule->mask, 0, sizeof(struct ixgbe_hw_fdir_mask)); /* mask default zero */ - rule->mask.vlan_tci_mask = 0; - rule->mask.flex_bytes_mask = 0; - -@@ -1760,6 +1804,8 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) - } - } else { - if (item->type != RTE_FLOW_ITEM_TYPE_IPV4 && -+ /* Signature mode supports IPv6. */ -+ item->type != RTE_FLOW_ITEM_TYPE_IPV6 && - item->type != RTE_FLOW_ITEM_TYPE_VLAN) { - memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); - rte_flow_error_set(error, EINVAL, -@@ -1815,6 +1861,10 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) - */ - rule->ixgbe_fdir.formatted.flow_type = - IXGBE_ATR_FLOW_TYPE_IPV4; -+ -+ /* Update flow rule mode by global param. */ -+ rule->mode = dev->data->dev_conf.fdir_conf.mode; -+ - /*Not supported last point for range*/ - if (item->last) { - rte_flow_error_set(error, EINVAL, -@@ -1888,6 +1938,9 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) - rule->ixgbe_fdir.formatted.flow_type = - IXGBE_ATR_FLOW_TYPE_IPV6; - -+ /* Update flow rule mode by global param. */ -+ rule->mode = dev->data->dev_conf.fdir_conf.mode; -+ - /** - * 1. must signature match - * 2. not support last -@@ -2748,12 +2801,45 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) - return ixgbe_parse_fdir_act_attr(attr, actions, rule, error); - } - -+static inline int -+ixgbe_fdir_rule_patch(struct rte_eth_dev *dev, struct ixgbe_fdir_rule *rule) -+{ -+ static uint32_t softid[IXGBE_MAX_RX_QUEUE_NUM] = { 0 }; -+ -+ if (!rule) -+ return 0; -+ -+ if (!dev || !dev->data) -+ return -EINVAL; -+ if (rule->queue >= IXGBE_MAX_RX_QUEUE_NUM) -+ return -EINVAL; -+ -+ /* Soft-id for different rx-queue should be different. */ -+ rule->soft_id = softid[rule->queue]++; -+ -+ /* Disable mask config from rte_flow. -+ * FIXME: -+ * Ixgbe only supports one global mask, all the masks should be the same. -+ * Generally, fdir masks should be configured globally before port start. -+ * But the rte_flow configures masks at flow creation. So we disable fdir -+ * mask configs in rte_flow and configure it globally when port start. -+ * Refer to `ixgbe_dev_start/ixgbe_fdir_configure` for details. The global -+ * masks are configured into device initially with user specified params. -+ */ -+ rule->b_mask = 0; -+ -+ /* Use user-defined mode. */ -+ rule->mode = dev->data->dev_conf.fdir_conf.mode; -+ -+ return 0; -+} -+ - static int - ixgbe_parse_fdir_filter(struct rte_eth_dev *dev, - const struct rte_flow_attr *attr, - const struct rte_flow_item pattern[], - const struct rte_flow_action actions[], -- struct ixgbe_fdir_rule *rule, -+ struct ixgbe_fdir_rule *rule, bool b_patch, - struct rte_flow_error *error) - { - int ret; -@@ -2787,13 +2873,18 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) - rule->ixgbe_fdir.formatted.dst_port != 0)) - return -ENOTSUP; - -- if (fdir_mode == RTE_FDIR_MODE_NONE || -- fdir_mode != rule->mode) -+ if (fdir_mode == RTE_FDIR_MODE_NONE) - return -ENOTSUP; - - if (rule->queue >= dev->data->nb_rx_queues) - return -ENOTSUP; - -+ if (ret) -+ return ret; -+ -+ if (b_patch) -+ return ixgbe_fdir_rule_patch(dev, rule); -+ - return ret; - } - -@@ -3128,7 +3219,7 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) - - memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); - ret = ixgbe_parse_fdir_filter(dev, attr, pattern, -- actions, &fdir_rule, error); -+ actions, &fdir_rule, true, error); - if (!ret) { - /* A mask cannot be deleted. */ - if (fdir_rule.b_mask) { -@@ -3299,7 +3390,7 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) - - memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); - ret = ixgbe_parse_fdir_filter(dev, attr, pattern, -- actions, &fdir_rule, error); -+ actions, &fdir_rule, false, error); - if (!ret) - return 0; - -@@ -3335,7 +3426,7 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) - struct ixgbe_eth_syn_filter_ele *syn_filter_ptr; - struct ixgbe_eth_l2_tunnel_conf_ele *l2_tn_filter_ptr; - struct ixgbe_fdir_rule_ele *fdir_rule_ptr; -- struct ixgbe_flow_mem *ixgbe_flow_mem_ptr; -+ struct ixgbe_flow_mem *ixgbe_flow_mem_ptr, *next_ptr; - struct ixgbe_hw_fdir_info *fdir_info = - IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private); - struct ixgbe_rss_conf_ele *rss_filter_ptr; -@@ -3432,7 +3523,7 @@ static inline uint8_t signature_match(const struct rte_flow_item pattern[]) - return ret; - } - -- TAILQ_FOREACH(ixgbe_flow_mem_ptr, &ixgbe_flow_list, entries) { -+ TAILQ_FOREACH_SAFE(ixgbe_flow_mem_ptr, &ixgbe_flow_list, entries, next_ptr) { - if (ixgbe_flow_mem_ptr->flow == pmd_flow) { - TAILQ_REMOVE(&ixgbe_flow_list, - ixgbe_flow_mem_ptr, entries); --- -1.8.3.1 - diff --git a/patch/dpdk-stable-20.11.10/0007-bonding-device-sends-packets-with-user-specified-sal.patch b/patch/dpdk-stable-20.11.10/0007-bonding-device-sends-packets-with-user-specified-sal.patch deleted file mode 100644 index 50b5ebf82..000000000 --- a/patch/dpdk-stable-20.11.10/0007-bonding-device-sends-packets-with-user-specified-sal.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 69849e246e15f3e202e539809cefd18fe7083575 Mon Sep 17 00:00:00 2001 -From: yuwenchao -Date: Tue, 30 Jul 2024 15:39:28 +0800 -Subject: [PATCH 7/7] bonding device sends packets with user specified salve - port - -The outgoing slave port is specified in mbuf field "hash.txadapter.reserved2". -Support the following 3 bonding mode: -- mode 0: round robin -- mode 2: balance -- mode 4: 8023ad - -Signed-off-by: yuwenchao ---- - drivers/net/bonding/rte_eth_bond_pmd.c | 26 ++++++++++++++++++++++++-- - lib/librte_mbuf/rte_mbuf.h | 2 ++ - 2 files changed, 26 insertions(+), 2 deletions(-) - -diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c -index f770f50..3c93365 100644 ---- a/drivers/net/bonding/rte_eth_bond_pmd.c -+++ b/drivers/net/bonding/rte_eth_bond_pmd.c -@@ -587,6 +587,22 @@ struct client_stats_t { - return nb_recv_pkts; - } - -+static inline int -+bond_ethdev_populate_slave_by_user(const struct rte_mbuf *mbuf, const uint16_t *slaves, -+ int num_slave) -+{ -+ uint16_t i, pid = mbuf->hash.txadapter.reserved2; -+ -+ if (likely(pid == RTE_MBUF_PORT_INVALID)) -+ return -1; -+ -+ for (i = 0; i < num_slave; i++) { -+ if (slaves[i] == pid) -+ return i; -+ } -+ return -1; -+} -+ - static uint16_t - bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs, - uint16_t nb_pkts) -@@ -619,7 +635,9 @@ struct client_stats_t { - - /* Populate slaves mbuf with which packets are to be sent on it */ - for (i = 0; i < nb_pkts; i++) { -- cslave_idx = (slave_idx + i) % num_of_slaves; -+ cslave_idx = bond_ethdev_populate_slave_by_user(bufs[i], slaves, num_of_slaves); -+ if (likely(cslave_idx < 0)) -+ cslave_idx = (slave_idx + i) % num_of_slaves; - slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i]; - } - -@@ -1176,7 +1194,11 @@ struct bwg_slave { - - for (i = 0; i < nb_bufs; i++) { - /* Populate slave mbuf arrays with mbufs for that slave. */ -- uint16_t slave_idx = bufs_slave_port_idxs[i]; -+ int slave_idx; -+ -+ slave_idx = bond_ethdev_populate_slave_by_user(bufs[i], slave_port_ids, slave_count); -+ if (likely(slave_idx < 0)) -+ slave_idx = bufs_slave_port_idxs[i]; - - slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i]; - } -diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h -index d079462..d6072ea 100644 ---- a/lib/librte_mbuf/rte_mbuf.h -+++ b/lib/librte_mbuf/rte_mbuf.h -@@ -589,6 +589,7 @@ static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp) - - if (rte_mempool_get(mp, (void **)&m) < 0) - return NULL; -+ m->hash.txadapter.reserved2 = RTE_MBUF_PORT_INVALID; - __rte_mbuf_raw_sanity_check(m); - return m; - } -@@ -864,6 +865,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m) - m->vlan_tci_outer = 0; - m->nb_segs = 1; - m->port = RTE_MBUF_PORT_INVALID; -+ m->hash.txadapter.reserved2 = RTE_MBUF_PORT_INVALID; - - m->ol_flags &= EXT_ATTACHED_MBUF; - m->packet_type = 0; --- -1.8.3.1 - diff --git a/patch/ubuntu_keepalived.patch b/patch/ubuntu_keepalived.patch deleted file mode 100644 index 7d13dbaa5..000000000 --- a/patch/ubuntu_keepalived.patch +++ /dev/null @@ -1,32 +0,0 @@ ---- tools/keepalived/configure-origin.ac 2020-07-29 14:31:45.282997622 +0800 -+++ tools/keepalived/configure.ac 2020-07-29 14:30:38.712993278 +0800 -@@ -1070,29 +1070,6 @@ AM_CONDITIONAL([BUILD_GENHASH], [test $B - - dnl ----[ Check for IPv4 devconf netlink support ]---- - IPV4_DEVCONF=No --if test .$enable_vrrp != .no; then -- dnl ----[Check have IPV4_DEVCONF defines - since Linux 3.11]---- -- SAV_CPPFLAGS="$CPPFLAGS" -- CPPFLAGS="$CPP_FLAGS $kernelinc" -- IPV4_DEVCONF=Yes -- AC_CHECK_DECLS([ -- IPV4_DEVCONF_ARP_IGNORE, -- IPV4_DEVCONF_ACCEPT_LOCAL, -- IPV4_DEVCONF_RP_FILTER, -- IPV4_DEVCONF_ARPFILTER], -- [], -- [ -- IPV4_DEVCONF=No -- break -- ], -- [[#include ]]) -- if test $IPV4_DEVCONF = Yes; then -- AC_DEFINE([_HAVE_IPV4_DEVCONF_], [ 1 ], [Define to 1 if have IPv4 netlink device configuration]) -- add_system_opt([IPV4_DEVCONF]) -- fi -- AC_CHECK_HEADERS([linux/rtnetlink.h], [], [AC_MSG_ERROR([Unusable linux/rtnetlink.h])], [$RTNETLINK_EXTRA_INCLUDE]) -- CPPFLAGS="$SAV_CPPFLAGS" --fi - - dnl ----[ Check for IPv6 Advanced API (RFC3542) - since Linux 2.6.14 ]---- - IPV6_ADVANCED_API=No diff --git a/scripts/dpdk-build.sh b/scripts/dpdk-build.sh index 600a263d4..a43653dc9 100755 --- a/scripts/dpdk-build.sh +++ b/scripts/dpdk-build.sh @@ -5,85 +5,81 @@ build_options="-Denable_kmods=true" debug_options="-Dbuildtype=debug -Dc_args=-DRTE_MALLOC_DEBUG" -dpdkver=20.11.10 # default dpdk version (use stable version) -tarball=dpdk-${dpdkver}.tar.xz -srcdir=dpdk-stable-$dpdkver - -workdir="" +dpdkver=24.11 # default dpdk version (use stable version) +workdir=$(pwd)/dpdk/ patchdir="" function help() { + local default_patchdir=$(realpath ./patch/dpdk-stable-${dpdkver}) echo -e "\033[31musage: $0 [-d] [-w work-directory] [-p patch-directory]\033[0m" echo -e "\033[31mOPTIONS:\033[0m" - echo -e "\033[31m -v specify the dpdk version, default $dpdkver\033[0m" + echo -e "\033[31m -v specify the dpdk version, default ${dpdkver}\033[0m" echo -e "\033[31m -d build dpdk libary with debug info\033[0m" - echo -e "\033[31m -w specify the work directory prefix, default $(pwd)\033[0m" - echo -e "\033[31m -p specify the dpdk patch directory, default $(pwd)/patch/dpdk-stable-$dpdkver\033[0m" + echo -e "\033[31m -w specify the work directory prefix, default \"${workdir}\"\033[0m" + echo -e "\033[31m -p specify the dpdk patch directory, default \"${default_patchdir}\"\033[0m" } -function set_dpdk_version() +function dpdk_version2tarball() { - dpdkver=$1 - tarball=dpdk-${dpdkver}.tar.xz - srcdir=dpdk-stable-$dpdkver + local version=$1 + echo dpdk-${version}.tar.xz } -function set_work_directory() +function dpdk_tarball_source_directory() { - [ ! -d $1 ] && return 1 - workdir=$(realpath $1)/dpdk -} + local tarball=$1 -function set_patch_directory() -{ - [ ! -d $1 ] && return 1 - patchdir=$(realpath $1) + [ ! -f "$tarball" ] && return 1 + tar -tf ${tarball} | grep '/$' | head -n 1 | sed 's/\///' } ## parse args while getopts "hw:p:dv:" OPT; do case $OPT in - v) set_dpdk_version $OPTARG;; - w) set_work_directory $OPTARG ;; - p) set_patch_directory $OPTARG;; + v) dpdkver=$OPTARG;; + w) workdir=$OPTARG ;; + p) patchdir=$(realpath $OPTARG);; d) build_options="${build_options} ${debug_options}";; - ?) help && exit 1;; + ?) help;exit 1;; esac done -[ -z "$workdir" ] && workdir=$(pwd)/dpdk # use default work directory -[ -z "$patchdir" ] && patchdir=$(pwd)/patch/dpdk-stable-$dpdkver # use default dpdk patch directory - -[ ! -d $workdir ] && mkdir $workdir -echo -e "\033[32mwork directory: $workdir\033[0m" - -[ ! -d $patchdir ] && echo -e "\033[31mdpdk patch file directory doesn't exist: $patchdir\033[0m" && exit 1 -echo -e "\033[32mdpdk patch directory: $patchdir\033[0m" +[ -f $workdir ] && echo -e "\033[31mError: work diretory \"${workdir}\" is a regular file\033[0m" && exit 1 +[ ! -d $workdir ] && mkdir -p $workdir +workdir=$(realpath $workdir) +echo -e "\033[32musing work directory: $workdir\033[0m" -echo -e "\033[32mbuild options: $build_options\033[0m" +predir=$(pwd) +pushd $workdir ## prepare dpdk sources -cd $workdir +tarball=$(dpdk_version2tarball $dpdkver) if [ ! -f $tarball ]; then wget https://fast.dpdk.org/rel/$tarball -P $workdir - [ ! -f $tarball ] && echo -e "\033[31mfail to download $tarball\033[0m" && exit 1 + [ ! -f $tarball ] && echo -e "\033[31mfailed to download \"$tarball\"\033[0m" && exit 1 fi +srcdir=$(dpdk_tarball_source_directory $tarball) [ -d $workdir/$srcdir ] && echo -e "\033[33mremoving old source directory: $workdir/$srcdir\033[0m" && rm -rf $workdir/$srcdir tar xf $tarball -C $workdir -echo "$(pwd), $workdir, $srcdir" -[ ! -d $workdir/$srcdir ] && echo -e "\033[31m$workdir/$srcdir directory is missing\033[0m" && exit 1 +[ ! -d $workdir/$srcdir ] && echo -e "\033[31m dpdk source diretory \"$workdir/$srcdir\" not found\033[0m" && exit 1 ## patch dpdk +[ -z "$patchdir" ] && patchdir=$(realpath "$predir/patch/$srcdir") +[ ! -d "$patchdir" ] && echo -e "\033[31mError: dpdk patch directory \"${patchdir}\" not exist\033[0m" && exit 1 +echo -e "\033[32musing dpdk patch directory: $patchdir\033[0m" + for patchfile in $(ls $patchdir) do patch -p1 -d $workdir/$srcdir < $patchdir/$patchfile - [ $? -ne 0 ] && echo -e "\033[31mfail to patch: $patchfile\033[0m" && exit 1 + [ $? -ne 0 ] && echo -e "\033[31mfailed to patch: $patchfile\033[0m" && exit 1 echo -e "\033[32msucceed to patch: $patchfile\033[0m" done ## build dpdk and install +echo -e "\033[32mbuild options: $build_options\033[0m" + [ -d dpdkbuild ] && rm -rf dpdkbuild/* || mkdir dpdkbuild [ -d dpdklib ] && rm -rf dpdklib/* || mkdir dpdklib @@ -97,10 +93,12 @@ ninja -C dpdkbuild install kni=dpdkbuild/kernel/linux/kni/rte_kni.ko [ -f $kni ] && install -m 644 $kni dpdklib -echo -e "DPDK library installed successfully into directory: \033[32m$(pwd)/dpdklib\033[0m" +echo -e "DPDK library installed successfully into directory: \033[32m${workdir}/dpdklib\033[0m" ## export dpdk lib echo -e "You can use this library in dpvs by running the command below:" echo -e "\033[32m" echo -e "export PKG_CONFIG_PATH=$(find $(pwd) -name pkgconfig)" echo -e "\033[0m" + +popd diff --git a/src/VERSION b/src/VERSION index f97d35bb7..d2eb1902b 100755 --- a/src/VERSION +++ b/src/VERSION @@ -1,37 +1,23 @@ #!/bin/sh # program: dpvs -# Sep 19, 2024 # +# Jan 9, 2025 # ## # Features -# - dpvs: Support QUIC/HTTP3, add nginx patches and facilitating code snippets for use of quic. -# - dpvs: Support SCTP forwarding implementation. -# - dpvs: Support LLDP protocol. -# - dpvs: Update default dpdk version to dpdk-stable-20.11.10. -# - dpvs: IPVS supports ipset based allow/deny list which allows for cidr acl rule. -# - dpvs: Support IPv6 link-local address auto configuration. -# - tools: Add ipset supports in dpvs-agent. -# - tools: Add snapshot caches for dpvs-agent virtual server apis. -# - doc: Update README.md. +# - dpvs: Upgrade dpdk from 20.11 to 24.11. +# - dpvs: Support virtio-user kni implement. +# - dpvs: Remove flow director and replace it with rte_flow completely. +# - dpvs: IPv6 routes support flush and lpm differentiates identical prefix routes on different ports. # # Bugfixes -# - dpvs: Fix multicast address sync problems and add dpip supports for multicast address lookup. -# - dpvs: Fix build errors and warnings with gcc verison 8.0+. -# - dpvs: Fix coredump problem when starting dpvs with insufficient memory. -# - dpvs: Use dpdk random generator in critical datapath for performance enhancement. -# - dpvs: Fix ipset default address family problem. -# - dpvs: Fix segmentation fault problem when running on machines whose cpu number is over DPVS_MAX_LCORE. -# - dpvs: Refactor netif_rte_port_alloc with netif_alloc. -# - dpvs: Fix prolems in IPv6 all-nodes and all-routers address initialization. -# - dpvs: Fix memory corruption problem when retrieving nic's xstats. -# - tools: Fix concurrency racing problem when dpvs-agent and healthcheck changing rs simultaneously. -# - tools: Fix healthchech bad icmp checksum problem ocasionally appeared in udp and udpping checkers. -# - tools: Fix keepalived quorum up script not excuted problem when old rs removed and new ones added in a reload. -# - tools: Fix ipvsadm local IP won't remove problem. -# - tools: Fix ipset list-all problem and improve efficiency. -# - tools: Fix dpip delay problem when list empty ipset with sorting enabled. +# - dpvs: Fix packet reception problem caused by TX_OFFLOAD_MBUF_FAST_FREE. +# - dpvs: Fix dropped packet accounting problem caused by ARP replies from kni devices. +# - dpvs: Fix some logging and header including problems. +# - dpvs: Flush addrs and routes when vlan device removed. +# - conf: Fix init attribute for serveral config items. +# - script: Fix directory problems in dpdk build script dpdk-build.sh. # -export VERSION=1.9 -export RELEASE=8 +export VERSION=1.10 +export RELEASE=1 echo $VERSION-$RELEASE diff --git a/src/config.mk b/src/config.mk index e3a0007ec..3307a5f93 100644 --- a/src/config.mk +++ b/src/config.mk @@ -14,6 +14,11 @@ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # +# +# Notice: +# Do NOT configure DPVS features by modifying this file. +# Instead, use file `config.mk` in DPVS's home directory. +# CFLAGS += -D DPVS_MAX_SOCKET=$(CONFIG_DPVS_MAX_SOCKET) CFLAGS += -D DPVS_MAX_LCORE=$(CONFIG_DPVS_MAX_LCORE) @@ -22,11 +27,6 @@ ifeq ($(CONFIG_DPVS_AGENT), y) CFLAGS += -D CONFIG_DPVS_AGENT endif -# for ixgbe nic -ifeq ($(CONFIG_IXGEB_PMD), y) -CFLAGS += -D CONFIG_DPVS_FDIR -endif - ifeq ($(CONFIG_DPVS_LOG), y) CFLAGS += -D CONFIG_DPVS_LOG endif @@ -39,9 +39,9 @@ ifeq ($(CONFIG_ICMP_REDIRECT_CORE), y) CFLAGS += -D CONFIG_ICMP_REDIRECT_CORE endif -ifeq ($(CONFIG_KNI_VIRTIO_USER), y) +## CONFIG_KNI_VIRTIO_USER should always be ON from DPVS v1.10 +## since only virtio-user kni supported from then. CFLAGS += -D CONFIG_KNI_VIRTIO_USER -endif ifeq ($(CONFIG_DPVS_NEIGH_DEBUG), y) CFLAGS += -D CONFIG_DPVS_NEIGH_DEBUG diff --git a/src/eal_mem.c b/src/eal_mem.c index ce7e8e6bf..baff86f03 100644 --- a/src/eal_mem.c +++ b/src/eal_mem.c @@ -31,7 +31,12 @@ static uint64_t s_all_socket_heap_freesz[RTE_MAX_NUMA_NODES]; #else #define MAX_SEGMENT_NUM RTE_MAX_MEMSEG #endif + +#if RTE_VERSION >= RTE_VERSION_NUM(24, 11, 0, 0) +#define MAX_MEMZONE_NUM 2560 +#else #define MAX_MEMZONE_NUM RTE_MAX_MEMZONE +#endif #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0) static int dp_vs_get_all_socket_heap_freesz_stats(void) diff --git a/src/icmp.c b/src/icmp.c index 67de0ea35..04c952cab 100644 --- a/src/icmp.c +++ b/src/icmp.c @@ -57,7 +57,7 @@ static int icmp_echo(struct rte_mbuf *mbuf) uint16_t csum; struct flow4 fl4; - if (ich->icmp_type != RTE_IP_ICMP_ECHO_REQUEST || ich->icmp_code != 0) { + if (ich->icmp_type != RTE_ICMP_TYPE_ECHO_REQUEST || ich->icmp_code != 0) { RTE_LOG(WARNING, ICMP, "%s: not echo-request\n", __func__); goto errout; } @@ -79,7 +79,7 @@ static int icmp_echo(struct rte_mbuf *mbuf) goto errout; } - ich->icmp_type = RTE_IP_ICMP_ECHO_REPLY; + ich->icmp_type = RTE_ICMP_TYPE_ECHO_REPLY; /* recalc the checksum */ ich->icmp_cksum = 0; csum = rte_raw_cksum(ich, mbuf->pkt_len); diff --git a/src/ip_tunnel.c b/src/ip_tunnel.c index e26f3eee7..e99392502 100644 --- a/src/ip_tunnel.c +++ b/src/ip_tunnel.c @@ -894,9 +894,9 @@ int ip_tunnel_pull_header(struct rte_mbuf *mbuf, int hlen, __be16 in_proto) return EDPVS_INVPKT; /* clean up vlan info, it should be cleared by vlan module. */ - if (unlikely(mbuf->ol_flags & PKT_RX_VLAN_STRIPPED)) { + if (unlikely(mbuf->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED)) { mbuf->vlan_tci = 0; - mbuf->ol_flags &= (~PKT_RX_VLAN_STRIPPED); + mbuf->ol_flags &= (~RTE_MBUF_F_RX_VLAN_STRIPPED); } return EDPVS_OK; diff --git a/src/ipset/ipset_bitmap_ipmac.c b/src/ipset/ipset_bitmap_ipmac.c index 554ec03ba..85757a24b 100644 --- a/src/ipset/ipset_bitmap_ipmac.c +++ b/src/ipset/ipset_bitmap_ipmac.c @@ -158,10 +158,10 @@ bitmap_ipmac_test(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) if (dst_match) { e.id = ip_to_id(map, ntohl(ip4hdr->dst_addr)); - e.mac = &ehdr->d_addr.addr_bytes[0]; + e.mac = &ehdr->dst_addr.addr_bytes[0]; } else { e.id = ip_to_id(map, ntohl(ip4hdr->src_addr)); - e.mac = &ehdr->s_addr.addr_bytes[0]; + e.mac = &ehdr->src_addr.addr_bytes[0]; } return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); diff --git a/src/ipset/ipset_hash_ip.c b/src/ipset/ipset_hash_ip.c index b0bc670fb..fd588754e 100644 --- a/src/ipset/ipset_hash_ip.c +++ b/src/ipset/ipset_hash_ip.c @@ -169,9 +169,9 @@ hash_ip_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) memset(&e, 0, sizeof(e)); if (dst_match) - memcpy(&e.ip, ip6hdr->dst_addr, sizeof(e.ip)); + memcpy(&e.ip, &ip6hdr->dst_addr, sizeof(e.ip)); else - memcpy(&e.ip, ip6hdr->src_addr, sizeof(e.ip)); + memcpy(&e.ip, &ip6hdr->src_addr, sizeof(e.ip)); return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); } diff --git a/src/ipset/ipset_hash_ipport.c b/src/ipset/ipset_hash_ipport.c index ebf39a5b8..0cc9081bf 100644 --- a/src/ipset/ipset_hash_ipport.c +++ b/src/ipset/ipset_hash_ipport.c @@ -243,11 +243,11 @@ hash_ipport_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) e.proto = proto; if (dst_match) { - memcpy(&e.ip, ip6hdr->dst_addr, sizeof(e.ip)); + memcpy(&e.ip, &ip6hdr->dst_addr, sizeof(e.ip)); if (l4hdr) e.port = l4hdr->dst_port; } else { - memcpy(&e.ip, ip6hdr->src_addr, sizeof(e.ip)); + memcpy(&e.ip, &ip6hdr->src_addr, sizeof(e.ip)); if (l4hdr) e.port = l4hdr->src_port; } diff --git a/src/ipset/ipset_hash_ipportip.c b/src/ipset/ipset_hash_ipportip.c index 64823550e..1613a7dec 100644 --- a/src/ipset/ipset_hash_ipportip.c +++ b/src/ipset/ipset_hash_ipportip.c @@ -258,8 +258,8 @@ hash_ipportip_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) memset(&e, 0, sizeof(e)); e.proto = proto; - memcpy(&e.ip1, ip6hdr->src_addr, sizeof(e.ip1)); - memcpy(&e.ip2, ip6hdr->dst_addr, sizeof(e.ip2)); + memcpy(&e.ip1, &ip6hdr->src_addr, sizeof(e.ip1)); + memcpy(&e.ip2, &ip6hdr->dst_addr, sizeof(e.ip2)); if (l4hdr) { if (dst_match) diff --git a/src/ipset/ipset_hash_ipportnet.c b/src/ipset/ipset_hash_ipportnet.c index 78a42008d..f39cc2fbd 100644 --- a/src/ipset/ipset_hash_ipportnet.c +++ b/src/ipset/ipset_hash_ipportnet.c @@ -256,8 +256,8 @@ hash_ipportnet_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) // Unlikely other set types, which match source address first and then dest address, // ip,port,net always matches source address with its "net" part, dest address with its // "ip" part respectively, and its "port" part match is determined by param dst_match. - memcpy(&e.ip2, ip6hdr->dst_addr, sizeof(e.ip2)); // dst_ip - memcpy(&e.ip, ip6hdr->src_addr, sizeof(e.ip)); // src_net + memcpy(&e.ip2, &ip6hdr->dst_addr, sizeof(e.ip2)); // dst_ip + memcpy(&e.ip, &ip6hdr->src_addr, sizeof(e.ip)); // src_net if (l4hdr) { if (dst_match) e.port = l4hdr->dst_port; // dst_ip,dst_port,src_net diff --git a/src/ipset/ipset_hash_net.c b/src/ipset/ipset_hash_net.c index fb9084c32..189e6bcac 100644 --- a/src/ipset/ipset_hash_net.c +++ b/src/ipset/ipset_hash_net.c @@ -195,9 +195,9 @@ hash_net_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) memset(&e, 0, sizeof(e)); if (dst_match) - memcpy(&e.ip, ip6hdr->dst_addr, sizeof(e.ip)); + memcpy(&e.ip, &ip6hdr->dst_addr, sizeof(e.ip)); else - memcpy(&e.ip, ip6hdr->src_addr, sizeof(e.ip)); + memcpy(&e.ip, &ip6hdr->src_addr, sizeof(e.ip)); return set->type->adtfn[IPSET_OP_TEST](set, &e, 0); } diff --git a/src/ipset/ipset_hash_netport.c b/src/ipset/ipset_hash_netport.c index fb17917d4..1ef4742c9 100644 --- a/src/ipset/ipset_hash_netport.c +++ b/src/ipset/ipset_hash_netport.c @@ -240,11 +240,11 @@ hash_netport_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) memset(&e, 0, sizeof(e)); e.proto = proto; if (dst_match) { - memcpy(&e.ip, ip6hdr->dst_addr, sizeof(e.ip)); + memcpy(&e.ip, &ip6hdr->dst_addr, sizeof(e.ip)); if (l4hdr) e.port = l4hdr->dst_port; } else { - memcpy(&e.ip, ip6hdr->src_addr, sizeof(e.ip)); + memcpy(&e.ip, &ip6hdr->src_addr, sizeof(e.ip)); if (l4hdr) e.port = l4hdr->src_port; } diff --git a/src/ipset/ipset_hash_netportiface.c b/src/ipset/ipset_hash_netportiface.c index ef00c5d32..381bf0eaf 100644 --- a/src/ipset/ipset_hash_netportiface.c +++ b/src/ipset/ipset_hash_netportiface.c @@ -255,11 +255,11 @@ hash_netportiface_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match e.iface = mbuf->port; if (dst_match) { - memcpy(&e.ip, ip6hdr->dst_addr, sizeof(e.ip)); + memcpy(&e.ip, &ip6hdr->dst_addr, sizeof(e.ip)); if (l4hdr) e.port = l4hdr->dst_port; } else { - memcpy(&e.ip, ip6hdr->src_addr, sizeof(e.ip)); + memcpy(&e.ip, &ip6hdr->src_addr, sizeof(e.ip)); if (l4hdr) e.port = l4hdr->src_port; } diff --git a/src/ipset/ipset_hash_netportnet.c b/src/ipset/ipset_hash_netportnet.c index 16b14ec55..f6cf169e5 100644 --- a/src/ipset/ipset_hash_netportnet.c +++ b/src/ipset/ipset_hash_netportnet.c @@ -258,8 +258,8 @@ hash_netportnet_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_match) memset(&e, 0, sizeof(e)); e.proto = proto; - memcpy(&e.ip1, ip6hdr->src_addr, sizeof(e.ip1)); - memcpy(&e.ip2, ip6hdr->dst_addr, sizeof(e.ip2)); + memcpy(&e.ip1, &ip6hdr->src_addr, sizeof(e.ip1)); + memcpy(&e.ip2, &ip6hdr->dst_addr, sizeof(e.ip2)); if (l4hdr) { if (dst_match) e.port = l4hdr->dst_port; diff --git a/src/ipset/ipset_hash_netportnetport.c b/src/ipset/ipset_hash_netportnetport.c index 2cf0090a5..1fd810fd1 100644 --- a/src/ipset/ipset_hash_netportnetport.c +++ b/src/ipset/ipset_hash_netportnetport.c @@ -272,8 +272,8 @@ hash_netportnetport_test6(struct ipset *set, struct rte_mbuf *mbuf, bool dst_mat memset(&e, 0, sizeof(e)); e.proto = proto; - memcpy(&e.ip1, ip6hdr->src_addr, sizeof(e.ip1)); - memcpy(&e.ip2, ip6hdr->dst_addr, sizeof(e.ip2)); + memcpy(&e.ip1, &ip6hdr->src_addr, sizeof(e.ip1)); + memcpy(&e.ip2, &ip6hdr->dst_addr, sizeof(e.ip2)); if (l4hdr) { e.port1 = l4hdr->src_port; e.port2 = l4hdr->dst_port; diff --git a/src/ipv4.c b/src/ipv4.c index 21884c7a5..a7fb62b10 100644 --- a/src/ipv4.c +++ b/src/ipv4.c @@ -532,7 +532,7 @@ int ipv4_local_out(struct rte_mbuf *mbuf) iph->total_length = htons(mbuf->pkt_len); - if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) { + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) { iph->hdr_checksum = 0; } else { ip4_send_csum(iph); diff --git a/src/ipv6/route6_lpm.c b/src/ipv6/route6_lpm.c index a686e5e7c..7936c6bdc 100644 --- a/src/ipv6/route6_lpm.c +++ b/src/ipv6/route6_lpm.c @@ -237,7 +237,9 @@ static struct route6 *rt6_lpm_lookup(const struct flow6 *fl6) struct lpm6_route *lpm6rt; struct route6 *rt6 = NULL; - if (rte_lpm6_lookup(this_lpm6_struct, (uint8_t*)&fl6->fl6_daddr, &idx) != 0) { + if (rte_lpm6_lookup(this_lpm6_struct, + (const struct rte_ipv6_addr *)&fl6->fl6_daddr, + &idx) != 0) { if (this_rt6_default) rte_atomic32_inc(&this_rt6_default->refcnt); return this_rt6_default; @@ -363,7 +365,8 @@ static int rt6_lpm_add_lcore(const struct dp_vs_route6_conf *rt6_cfg) rt6_fill_with_cfg(&entry->entry, rt6_cfg); rte_atomic32_set(&entry->entry.refcnt, 1); - if (rte_lpm6_is_rule_present(this_lpm6_struct, (uint8_t*)&rt6_cfg->dst.addr, + if (rte_lpm6_is_rule_present(this_lpm6_struct, + (const struct rte_ipv6_addr *)&rt6_cfg->dst.addr, (uint8_t)rt6_cfg->dst.plen, &lpm_nexthop)) { assert(lpm_nexthop < g_lpm6_conf_max_rules); head = this_rt6_array->entries[lpm_nexthop]; @@ -373,7 +376,8 @@ static int rt6_lpm_add_lcore(const struct dp_vs_route6_conf *rt6_cfg) if (unlikely(ret != EDPVS_OK)) goto rt6_free; this_rt6_array->cursor = lpm_nexthop; - ret = rte_lpm6_add(this_lpm6_struct, (uint8_t*)&entry->entry.rt6_dst.addr, + ret = rte_lpm6_add(this_lpm6_struct, + (const struct rte_ipv6_addr *)&entry->entry.rt6_dst.addr, (uint8_t)entry->entry.rt6_dst.plen, lpm_nexthop); if (unlikely(ret < 0)) { ret = EDPVS_DPDKAPIFAIL; @@ -441,7 +445,8 @@ static int rt6_lpm_del_lcore(const struct dp_vs_route6_conf *rt6_cfg) } else if (lpm6rt->next) { this_rt6_array->entries[lpm_nexthop] = lpm6rt->next; } else { - ret = rte_lpm6_delete(this_lpm6_struct, (uint8_t *)&entry->rt6_dst.addr, + ret = rte_lpm6_delete(this_lpm6_struct, + (const struct rte_ipv6_addr *)&entry->rt6_dst.addr, (uint8_t)entry->rt6_dst.plen); if (unlikely(ret < 0)) { /* rte_lpm6_delete return OK even if no satisfied route exists, @@ -507,7 +512,8 @@ static int rt6_lpm_flush_lcore(const struct dp_vs_route6_conf *rt6_cfg) } else if (lpm6rt->next) { this_rt6_array->entries[lpm_nexthop] = lpm6rt->next; } else { - if (rte_lpm6_delete(this_lpm6_struct, (uint8_t *)&entry->rt6_dst.addr, + if (rte_lpm6_delete(this_lpm6_struct, + (const struct rte_ipv6_addr *)&entry->rt6_dst.addr, (uint8_t)entry->rt6_dst.plen) < 0) { dump_rt6_prefix(&entry->rt6_dst, buf, sizeof(buf)); RTE_LOG(WARNING, RT6, "[%d]%s: rt6_lpm_flush_lcore del %s dev %s failed!\n", diff --git a/src/ipvs/ip_vs_proto_tcp.c b/src/ipvs/ip_vs_proto_tcp.c index b10aedd49..6e754cfcd 100644 --- a/src/ipvs/ip_vs_proto_tcp.c +++ b/src/ipvs/ip_vs_proto_tcp.c @@ -177,7 +177,7 @@ int tcp_send_csum(int af, int iphdrlen, struct tcphdr *th, if (likely(select_dev && (select_dev->flag & NETIF_PORT_FLAG_TX_TCP_CSUM_OFFLOAD))) { mbuf->l3_len = iphdrlen; mbuf->l4_len = (th->doff << 2); - mbuf->ol_flags |= (PKT_TX_TCP_CKSUM | PKT_TX_IPV6); + mbuf->ol_flags |= (RTE_MBUF_F_TX_TCP_CKSUM | RTE_MBUF_F_TX_IPV6); th->check = ip6_phdr_cksum(ip6h, mbuf->ol_flags, iphdrlen, IPPROTO_TCP); } else { if (mbuf_may_pull(mbuf, mbuf->pkt_len) != 0) @@ -196,7 +196,7 @@ int tcp_send_csum(int af, int iphdrlen, struct tcphdr *th, if (likely(select_dev && (select_dev->flag & NETIF_PORT_FLAG_TX_TCP_CSUM_OFFLOAD))) { mbuf->l3_len = iphdrlen; mbuf->l4_len = (th->doff << 2); - mbuf->ol_flags |= (PKT_TX_TCP_CKSUM | PKT_TX_IP_CKSUM | PKT_TX_IPV4); + mbuf->ol_flags |= (RTE_MBUF_F_TX_TCP_CKSUM | RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_IPV4); th->check = rte_ipv4_phdr_cksum(iph, mbuf->ol_flags); } else { if (mbuf_may_pull(mbuf, mbuf->pkt_len) != 0) diff --git a/src/ipvs/ip_vs_proto_udp.c b/src/ipvs/ip_vs_proto_udp.c index b0fa040f2..20360d3eb 100644 --- a/src/ipvs/ip_vs_proto_udp.c +++ b/src/ipvs/ip_vs_proto_udp.c @@ -103,7 +103,7 @@ int udp_send_csum(int af, int iphdrlen, struct rte_udp_hdr *uh, if (likely(select_dev && (select_dev->flag & NETIF_PORT_FLAG_TX_UDP_CSUM_OFFLOAD))) { mbuf->l3_len = iphdrlen; mbuf->l4_len = sizeof(struct rte_udp_hdr); - mbuf->ol_flags |= (PKT_TX_UDP_CKSUM | PKT_TX_IPV6); + mbuf->ol_flags |= (RTE_MBUF_F_TX_UDP_CKSUM | RTE_MBUF_F_TX_IPV6); uh->dgram_cksum = ip6_phdr_cksum(ip6h, mbuf->ol_flags, iphdrlen, IPPROTO_UDP); } else { @@ -138,7 +138,7 @@ int udp_send_csum(int af, int iphdrlen, struct rte_udp_hdr *uh, if (likely(select_dev && (select_dev->flag & NETIF_PORT_FLAG_TX_UDP_CSUM_OFFLOAD))) { mbuf->l3_len = iphdrlen; mbuf->l4_len = sizeof(struct rte_udp_hdr); - mbuf->ol_flags |= (PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM | PKT_TX_IPV4); + mbuf->ol_flags |= (RTE_MBUF_F_TX_UDP_CKSUM | RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_IPV4); uh->dgram_cksum = rte_ipv4_phdr_cksum(iph, mbuf->ol_flags); } else { if (mbuf_may_pull(mbuf, mbuf->pkt_len) != 0) diff --git a/src/ipvs/ip_vs_synproxy.c b/src/ipvs/ip_vs_synproxy.c index 305b84f40..980151d6d 100644 --- a/src/ipvs/ip_vs_synproxy.c +++ b/src/ipvs/ip_vs_synproxy.c @@ -684,7 +684,7 @@ static void syn_proxy_reuse_mbuf(int af, struct rte_mbuf *mbuf, ip6h->ip6_dst = tmpaddr; ip6h->ip6_hlim = dp_vs_synproxy_ctrl_synack_ttl; - if (likely(mbuf->ol_flags & PKT_TX_TCP_CKSUM)) { + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM)) { mbuf->l3_len = (void *)th - (void *)ip6h; mbuf->l4_len = (th->doff << 2); th->check = ip6_phdr_cksum(ip6h, mbuf->ol_flags, mbuf->l3_len, IPPROTO_TCP); @@ -704,7 +704,7 @@ static void syn_proxy_reuse_mbuf(int af, struct rte_mbuf *mbuf, iph->tos = 0; /* compute checksum */ - if (likely(mbuf->ol_flags & PKT_TX_TCP_CKSUM)) { + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM)) { mbuf->l3_len = iphlen; mbuf->l4_len = (th->doff << 2); th->check = rte_ipv4_phdr_cksum((struct rte_ipv4_hdr*)iph, mbuf->ol_flags); @@ -714,7 +714,7 @@ static void syn_proxy_reuse_mbuf(int af, struct rte_mbuf *mbuf, tcp4_send_csum((struct rte_ipv4_hdr*)iph, th); } - if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) iph->check = 0; else ip4_send_csum((struct rte_ipv4_hdr*)iph); @@ -790,9 +790,10 @@ int dp_vs_synproxy_syn_rcv(int af, struct rte_mbuf *mbuf, } if (likely(dev && (dev->flag & NETIF_PORT_FLAG_TX_TCP_CSUM_OFFLOAD))) { if (af == AF_INET) - mbuf->ol_flags |= (PKT_TX_TCP_CKSUM | PKT_TX_IP_CKSUM | PKT_TX_IPV4); + mbuf->ol_flags |= (RTE_MBUF_F_TX_TCP_CKSUM | + RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_IPV4); else - mbuf->ol_flags |= (PKT_TX_TCP_CKSUM | PKT_TX_IPV6); + mbuf->ol_flags |= (RTE_MBUF_F_TX_TCP_CKSUM | RTE_MBUF_F_TX_IPV6); } /* reuse mbuf */ @@ -806,9 +807,9 @@ int dp_vs_synproxy_syn_rcv(int af, struct rte_mbuf *mbuf, RTE_LOG(ERR, IPVS, "%s: no memory\n", __func__); goto syn_rcv_out; } - memcpy(ðaddr, ð->s_addr, sizeof(struct rte_ether_addr)); - memcpy(ð->s_addr, ð->d_addr, sizeof(struct rte_ether_addr)); - memcpy(ð->d_addr, ðaddr, sizeof(struct rte_ether_addr)); + memcpy(ðaddr, ð->src_addr, sizeof(struct rte_ether_addr)); + memcpy(ð->src_addr, ð->dst_addr, sizeof(struct rte_ether_addr)); + memcpy(ð->dst_addr, ðaddr, sizeof(struct rte_ether_addr)); if (unlikely(EDPVS_OK != (ret = netif_xmit(mbuf, dev)))) { RTE_LOG(ERR, IPVS, "%s: netif_xmit failed -- %s\n", @@ -1036,9 +1037,9 @@ static int syn_proxy_build_tcp_rst(int af, struct rte_mbuf *mbuf, } if (likely(dev && (dev->flag & NETIF_PORT_FLAG_TX_TCP_CSUM_OFFLOAD))) { if (af == AF_INET6) - mbuf->ol_flags |= (PKT_TX_TCP_CKSUM | PKT_TX_IPV6); + mbuf->ol_flags |= (RTE_MBUF_F_TX_TCP_CKSUM | RTE_MBUF_F_TX_IPV6); else - mbuf->ol_flags |= (PKT_TX_TCP_CKSUM | PKT_TX_IP_CKSUM | PKT_TX_IPV4); + mbuf->ol_flags |= (RTE_MBUF_F_TX_TCP_CKSUM | RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_IPV4); } /* exchange ports */ @@ -1080,7 +1081,7 @@ static int syn_proxy_build_tcp_rst(int af, struct rte_mbuf *mbuf, ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - payload_len); /* compute checksum */ - if (likely(mbuf->ol_flags & PKT_TX_TCP_CKSUM)) { + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM)) { mbuf->l3_len = l3_len; mbuf->l4_len = l4_len; th->check = ip6_phdr_cksum(ip6h, mbuf->ol_flags, mbuf->l3_len, IPPROTO_TCP); @@ -1101,7 +1102,7 @@ static int syn_proxy_build_tcp_rst(int af, struct rte_mbuf *mbuf, ip4h->tos = 0; /* compute checksum */ - if (likely(mbuf->ol_flags & PKT_TX_TCP_CKSUM)) { + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM)) { mbuf->l3_len = l3_len; mbuf->l4_len = l4_len; th->check = rte_ipv4_phdr_cksum((struct rte_ipv4_hdr*)ip4h, mbuf->ol_flags); @@ -1111,7 +1112,7 @@ static int syn_proxy_build_tcp_rst(int af, struct rte_mbuf *mbuf, tcp4_send_csum((struct rte_ipv4_hdr*)ip4h, th); } - if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) ip4h->check = 0; else ip4_send_csum((struct rte_ipv4_hdr*)ip4h); @@ -1164,9 +1165,9 @@ static int syn_proxy_send_tcp_rst(int af, struct rte_mbuf *mbuf) RTE_LOG(ERR, IPVS, "%s: no memory\n", __func__); return EDPVS_NOMEM; } - memcpy(ðaddr, ð->s_addr, sizeof(struct rte_ether_addr)); - memcpy(ð->s_addr, ð->d_addr, sizeof(struct rte_ether_addr)); - memcpy(ð->d_addr, ðaddr, sizeof(struct rte_ether_addr)); + memcpy(ðaddr, ð->src_addr, sizeof(struct rte_ether_addr)); + memcpy(ð->src_addr, ð->dst_addr, sizeof(struct rte_ether_addr)); + memcpy(ð->dst_addr, ðaddr, sizeof(struct rte_ether_addr)); dev = netif_port_get(mbuf->port); if (unlikely(!dev)) { diff --git a/src/ipvs/ip_vs_xmit.c b/src/ipvs/ip_vs_xmit.c index a39b0df10..d4aa42650 100644 --- a/src/ipvs/ip_vs_xmit.c +++ b/src/ipvs/ip_vs_xmit.c @@ -73,7 +73,7 @@ static int __dp_vs_fast_xmit_fnat4(struct dp_vs_proto *proto, ip4h = ip4_hdr(mbuf); } - if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) { + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) { ip4h->hdr_checksum = 0; } else { ip4_send_csum(ip4h); @@ -81,8 +81,8 @@ static int __dp_vs_fast_xmit_fnat4(struct dp_vs_proto *proto, eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, (uint16_t)sizeof(struct rte_ether_hdr)); - rte_ether_addr_copy(&conn->in_dmac, ð->d_addr); - rte_ether_addr_copy(&conn->in_smac, ð->s_addr); + rte_ether_addr_copy(&conn->in_dmac, ð->dst_addr); + rte_ether_addr_copy(&conn->in_smac, ð->src_addr); eth->ether_type = rte_cpu_to_be_16(packet_type); mbuf->packet_type = packet_type; @@ -134,8 +134,8 @@ static int __dp_vs_fast_xmit_fnat6(struct dp_vs_proto *proto, eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, (uint16_t)sizeof(struct rte_ether_hdr)); - rte_ether_addr_copy(&conn->in_dmac, ð->d_addr); - rte_ether_addr_copy(&conn->in_smac, ð->s_addr); + rte_ether_addr_copy(&conn->in_dmac, ð->dst_addr); + rte_ether_addr_copy(&conn->in_smac, ð->src_addr); eth->ether_type = rte_cpu_to_be_16(packet_type); mbuf->packet_type = packet_type; @@ -195,7 +195,7 @@ static int __dp_vs_fast_outxmit_fnat4(struct dp_vs_proto *proto, return err; } - if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) { + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) { ip4h->hdr_checksum = 0; } else { ip4_send_csum(ip4h); @@ -203,8 +203,8 @@ static int __dp_vs_fast_outxmit_fnat4(struct dp_vs_proto *proto, eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, (uint16_t)sizeof(struct rte_ether_hdr)); - rte_ether_addr_copy(&conn->out_dmac, ð->d_addr); - rte_ether_addr_copy(&conn->out_smac, ð->s_addr); + rte_ether_addr_copy(&conn->out_dmac, ð->dst_addr); + rte_ether_addr_copy(&conn->out_smac, ð->src_addr); eth->ether_type = rte_cpu_to_be_16(packet_type); mbuf->packet_type = packet_type; @@ -256,8 +256,8 @@ static int __dp_vs_fast_outxmit_fnat6(struct dp_vs_proto *proto, eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, (uint16_t)sizeof(struct rte_ether_hdr)); - rte_ether_addr_copy(&conn->out_dmac, ð->d_addr); - rte_ether_addr_copy(&conn->out_smac, ð->s_addr); + rte_ether_addr_copy(&conn->out_dmac, ð->dst_addr); + rte_ether_addr_copy(&conn->out_smac, ð->src_addr); eth->ether_type = rte_cpu_to_be_16(packet_type); mbuf->packet_type = packet_type; @@ -302,8 +302,8 @@ static void dp_vs_save_xmit_info(struct rte_mbuf *mbuf, eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, mbuf->l2_len); - rte_ether_addr_copy(ð->s_addr, &conn->out_dmac); - rte_ether_addr_copy(ð->d_addr, &conn->out_smac); + rte_ether_addr_copy(ð->src_addr, &conn->out_dmac); + rte_ether_addr_copy(ð->dst_addr, &conn->out_smac); rte_pktmbuf_adj(mbuf, sizeof(struct rte_ether_hdr)); } @@ -331,8 +331,8 @@ static void dp_vs_save_outxmit_info(struct rte_mbuf *mbuf, eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, mbuf->l2_len); - rte_ether_addr_copy(ð->s_addr, &conn->in_dmac); - rte_ether_addr_copy(ð->d_addr, &conn->in_smac); + rte_ether_addr_copy(ð->src_addr, &conn->in_dmac); + rte_ether_addr_copy(ð->dst_addr, &conn->in_smac); rte_pktmbuf_adj(mbuf, sizeof(struct rte_ether_hdr)); } @@ -480,7 +480,7 @@ static int __dp_vs_xmit_fnat4(struct dp_vs_proto *proto, iph = ip4_hdr(mbuf); } - if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) { + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) { iph->hdr_checksum = 0; } else { ip4_send_csum(iph); @@ -679,7 +679,7 @@ static int __dp_vs_xmit_fnat64(struct dp_vs_proto *proto, ip4h = ip4_hdr(mbuf); } - if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) { + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) { ip4h->hdr_checksum = 0; } else { ip4_send_csum(ip4h); @@ -803,7 +803,7 @@ static int __dp_vs_out_xmit_fnat4(struct dp_vs_proto *proto, goto errout; } - if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) { + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) { iph->hdr_checksum = 0; } else { ip4_send_csum(iph); @@ -1406,7 +1406,7 @@ static int __dp_vs_xmit_snat4(struct dp_vs_proto *proto, } /* L3 re-checksum */ - if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) iph->hdr_checksum = 0; else ip4_send_csum(iph); @@ -1566,7 +1566,7 @@ static int __dp_vs_out_xmit_snat4(struct dp_vs_proto *proto, } /* L3 re-checksum */ - if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) iph->hdr_checksum = 0; else ip4_send_csum(iph); @@ -1605,7 +1605,7 @@ static int dp_vs_fast_xmit_nat(struct dp_vs_proto *proto, return err; } - if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) { + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) { iph->hdr_checksum = 0; } else { ip4_send_csum(iph); @@ -1613,8 +1613,8 @@ static int dp_vs_fast_xmit_nat(struct dp_vs_proto *proto, eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, (uint16_t)sizeof(struct rte_ether_hdr)); - rte_ether_addr_copy(&conn->in_dmac, ð->d_addr); - rte_ether_addr_copy(&conn->in_smac, ð->s_addr); + rte_ether_addr_copy(&conn->in_dmac, ð->dst_addr); + rte_ether_addr_copy(&conn->in_smac, ð->src_addr); eth->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4); mbuf->packet_type = RTE_ETHER_TYPE_IPV4; @@ -1650,7 +1650,7 @@ static int dp_vs_fast_outxmit_nat(struct dp_vs_proto *proto, return err; } - if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) { + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) { iph->hdr_checksum = 0; } else { ip4_send_csum(iph); @@ -1658,8 +1658,8 @@ static int dp_vs_fast_outxmit_nat(struct dp_vs_proto *proto, eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, (uint16_t)sizeof(struct rte_ether_hdr)); - rte_ether_addr_copy(&conn->out_dmac, ð->d_addr); - rte_ether_addr_copy(&conn->out_smac, ð->s_addr); + rte_ether_addr_copy(&conn->out_dmac, ð->dst_addr); + rte_ether_addr_copy(&conn->out_smac, ð->src_addr); eth->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4); mbuf->packet_type = RTE_ETHER_TYPE_IPV4; @@ -1817,7 +1817,7 @@ static int __dp_vs_xmit_nat4(struct dp_vs_proto *proto, goto errout; } - if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) { + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) { iph->hdr_checksum = 0; } else { ip4_send_csum(iph); @@ -1989,7 +1989,7 @@ static int __dp_vs_out_xmit_nat4(struct dp_vs_proto *proto, goto errout; } - if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) { + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) { iph->hdr_checksum = 0; } else { ip4_send_csum(iph); @@ -2156,7 +2156,7 @@ static int __dp_vs_xmit_tunnel4(struct dp_vs_proto *proto, new_iph->packet_id = ip4_select_id(new_iph); if (rt->port && rt->port->flag & NETIF_PORT_FLAG_TX_IP_CSUM_OFFLOAD) { - mbuf->ol_flags |= PKT_TX_IP_CKSUM; + mbuf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM; new_iph->hdr_checksum = 0; } else { ip4_send_csum(new_iph); @@ -2313,7 +2313,7 @@ static int __dp_vs_xmit_tunnel_6o4(struct dp_vs_proto *proto, new_iph->packet_id = ip4_select_id(new_iph); if (rt->port && rt->port->flag & NETIF_PORT_FLAG_TX_IP_CSUM_OFFLOAD) { - mbuf->ol_flags |= PKT_TX_IP_CKSUM; + mbuf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM; new_iph->hdr_checksum = 0; } else { ip4_send_csum(new_iph); diff --git a/src/kni.c b/src/kni.c index cf9d4ccb1..27c0920b9 100644 --- a/src/kni.c +++ b/src/kni.c @@ -32,18 +32,15 @@ #include #include #include "conf/common.h" -#include "dpdk.h" #include "netif.h" #include "conf/netif_addr.h" #include "ctrl.h" #include "kni.h" #include "vlan.h" -#include "linux_if.h" #include "conf/kni.h" #include "conf/sockopts.h" #define Kni /* KNI is defined */ -#define RTE_LOGTYPE_Kni RTE_LOGTYPE_USER1 #define KNI_RX_RING_ELEMS 2048 bool g_kni_enabled = true; @@ -72,10 +69,6 @@ static struct virtio_kni* virtio_kni_alloc(struct netif_port *dev, const char *i struct virtio_kni *kni = NULL; char portargs[1024]; char portname[RTE_ETH_NAME_MAX_LEN]; - struct { - struct ethtool_gfeatures hdr; - struct ethtool_get_features_block blocks[1]; - } gfeatures; kni = rte_zmalloc("virtio_kni", sizeof(*kni), RTE_CACHE_LINE_SIZE); if (unlikely(!kni)) @@ -114,18 +107,6 @@ static struct virtio_kni* virtio_kni_alloc(struct netif_port *dev, const char *i goto errout; } - // TODO: Support tx-csum offload on virtio-user kni device. - if (linux_get_if_features(kni->ifname, 1, (struct ethtool_gfeatures *)&gfeatures) < 0) - RTE_LOG(WARNING, Kni, "linux_get_if_features(%s) failed\n", kni->ifname); - else if (gfeatures.blocks[0].requested & 0x1A - /* NETIF_F_IP_CSUM_BIT|NETIF_F_HW_CSUM_BIT|NETIF_F_IPV6_CSUM_BIT */) - RTE_LOG(INFO, Kni, "%s: tx-csum offload supported but to be disabled on %s!\n", - __func__, kni->ifname); - - // Disable tx-csum offload, and delegate the task to device driver. - if (linux_set_tx_csum_offload(kni->ifname, 0) < 0) - RTE_LOG(WARNING, Kni, "failed to disable tx-csum offload on %s\n", kni->ifname); - RTE_ETH_FOREACH_DEV(pid) { rte_eth_dev_get_name_by_port(pid, portname); if (!strncmp(portname, kni->dpdk_portname, sizeof(kni->dpdk_portname))) { @@ -163,22 +144,21 @@ static void virtio_kni_free(struct virtio_kni **pkni) static struct rte_eth_conf virtio_kni_eth_conf = { .rxmode = { - .mq_mode = ETH_MQ_RX_NONE, - .max_rx_pkt_len = ETHER_MAX_LEN, - .split_hdr_size = 0, - .offloads = DEV_RX_OFFLOAD_CHECKSUM | DEV_RX_OFFLOAD_TCP_LRO, + .mq_mode = RTE_ETH_MQ_RX_NONE, + .mtu = RTE_ETHER_MTU, + //.offloads = RTE_ETH_RX_OFFLOAD_CHECKSUM | RTE_ETH_RX_OFFLOAD_TCP_LRO, }, .rx_adv_conf = { .rss_conf = { - .rss_hf = ETH_RSS_IP | ETH_RSS_TCP | ETH_RSS_UDP, + .rss_hf = RTE_ETH_RSS_IP | RTE_ETH_RSS_TCP | RTE_ETH_RSS_UDP, }, }, .txmode = { - .mq_mode = ETH_MQ_TX_NONE, - .offloads = DEV_TX_OFFLOAD_MBUF_FAST_FREE - | DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_UDP_TSO - | DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_TCP_CKSUM - | DEV_TX_OFFLOAD_UDP_CKSUM | DEV_TX_OFFLOAD_SCTP_CKSUM, + .mq_mode = RTE_ETH_MQ_TX_NONE, + .offloads = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE + | RTE_ETH_TX_OFFLOAD_TCP_TSO | RTE_ETH_TX_OFFLOAD_UDP_TSO + | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM + | RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_SCTP_CKSUM, }, }; @@ -233,6 +213,8 @@ static int virtio_kni_start(struct virtio_kni *kni) return EDPVS_DPDKAPIFAIL; } + //disable_kni_tx_csum_offload(kni->ifname); + rte_eth_macaddr_get(kni->dpdk_pid, &macaddr); if (!eth_addr_equal(&macaddr, &kni->master->kni.addr)) { RTE_LOG(INFO, Kni, "%s: update %s mac addr: %s->%s\n", __func__, kni->ifname, diff --git a/src/lldp.c b/src/lldp.c index 324a440e5..d884d9bc3 100644 --- a/src/lldp.c +++ b/src/lldp.c @@ -1453,8 +1453,8 @@ static int lldp_xmit(struct netif_port *dev, bool in_timer) ehdr = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, sizeof(*ehdr)); if (unlikely(!ptr)) return EDPVS_NOROOM; - rte_memcpy(&ehdr->d_addr, &LLDP_ETHER_ADDR_DST, sizeof(ehdr->d_addr)); - rte_memcpy(&ehdr->s_addr, &dev->addr, sizeof(ehdr->s_addr)); + rte_memcpy(&ehdr->dst_addr, &LLDP_ETHER_ADDR_DST, sizeof(ehdr->dst_addr)); + rte_memcpy(&ehdr->src_addr, &dev->addr, sizeof(ehdr->src_addr)); ehdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_LLDP); if (dev->type == PORT_TYPE_BOND_SLAVE) { diff --git a/src/log.c b/src/log.c index 6dc6f83cf..f6d3fde53 100644 --- a/src/log.c +++ b/src/log.c @@ -329,7 +329,7 @@ int log_slave_init(void) return EDPVS_DISABLED; RTE_LCORE_FOREACH_WORKER(lcore_id) { - if (rte_eal_get_lcore_state(lcore_id) == FINISHED) { + if (rte_eal_get_lcore_state(lcore_id) == WAIT) { rte_eal_wait_lcore(lcore_id); dpvs_log_thread_lcore_set(lcore_id); break; diff --git a/src/main.c b/src/main.c index 5cf291a24..600b9b44f 100644 --- a/src/main.c +++ b/src/main.c @@ -57,8 +57,8 @@ static void inline dpdk_version_check(void) { -#if RTE_VERSION < RTE_VERSION_NUM(20, 11, 1, 0) - rte_panic("The current DPVS needs dpdk-stable-20.11.1 or higher. " +#if RTE_VERSION < RTE_VERSION_NUM(24, 11, 0, 0) + rte_panic("The current DPVS requires dpdk-stable-24.11 or higher. " "Try old releases if you are using earlier dpdk versions."); #endif } @@ -343,7 +343,7 @@ int main(int argc, char *argv[]) for (pid = 0; pid < nports; pid++) { dev = netif_port_get(pid); if (!dev) { - RTE_LOG(WARNING, DPVS, "port %d not found\n", pid); + RTE_LOG(INFO, DPVS, "netif port of portid %d not found, likely kni portid, skip ...\n", pid); continue; } diff --git a/src/mbuf.c b/src/mbuf.c index 8c913eb36..137cc16da 100644 --- a/src/mbuf.c +++ b/src/mbuf.c @@ -28,10 +28,9 @@ #include "ipv4.h" #include "sys_time.h" -#define EMBUF -#define RTE_LOGTYPE_EMBUF RTE_LOGTYPE_USER1 +#define RTE_LOGTYPE_MBUF RTE_LOGTYPE_USER1 -#define MBUF_DYNFIELDS_MAX 8 +#define MBUF_DYNFIELDS_MAX 8 static int mbuf_dynfields_offset[MBUF_DYNFIELDS_MAX]; void *mbuf_userdata(struct rte_mbuf *mbuf, mbuf_usedata_field_t field) @@ -68,7 +67,7 @@ int mbuf_may_pull(struct rte_mbuf *mbuf, unsigned int len) /* different from skb, there's no way to expand mbuf's tail room, * because mbuf size is determined when init mbuf pool */ if (rte_pktmbuf_tailroom(mbuf) < delta) { - RTE_LOG(ERR, EMBUF, "%s: no tail room.", __func__); + RTE_LOG(ERR, MBUF, "%s: no tail room.", __func__); return -1; } @@ -121,7 +120,7 @@ void mbuf_copy_metadata(struct rte_mbuf *mi, struct rte_mbuf *m) mi->next = NULL; mi->pkt_len = mi->data_len; mi->nb_segs = 1; - mi->ol_flags = m->ol_flags & (~IND_ATTACHED_MBUF); + mi->ol_flags = m->ol_flags & (~RTE_MBUF_F_INDIRECT); mi->packet_type = m->packet_type; mbuf_userdata_reset(mi); @@ -166,7 +165,7 @@ struct rte_mbuf *mbuf_copy(struct rte_mbuf *md, struct rte_mempool *mp) } #ifdef CONFIG_DPVS_MBUF_DEBUG -inline void dp_vs_mbuf_dump(const char *msg, int af, const struct rte_mbuf *mbuf) +void dp_vs_mbuf_dump(const char *msg, int af, const struct rte_mbuf *mbuf) { char stime[SYS_TIME_STR_LEN]; char sbuf[64], dbuf[64]; diff --git a/src/mempool.c b/src/mempool.c index ca852b4bc..c8ccc4ea2 100644 --- a/src/mempool.c +++ b/src/mempool.c @@ -305,7 +305,7 @@ bool dpvs_mp_elem_ok(void *obj) } /* apply the patch to get `rte_memmory_ok`: - * dpdk-stable-17.11.6/enable-dpdk-eal-memory-debug.patch */ + * patch//xxxxx-enable-dpdk-eal-memory-debug.patch */ if (cookie->flag == MEM_OBJ_FROM_HEAP) assert(rte_memmory_ok((void *)cookie)); diff --git a/src/neigh.c b/src/neigh.c index c26e9e2ca..8cd384fcb 100644 --- a/src/neigh.c +++ b/src/neigh.c @@ -424,12 +424,12 @@ static void neigh_fill_mac(struct neighbour_entry *neighbour, if (!neighbour && target) { ipv6_mac_mult(target, &mult_eth); - rte_ether_addr_copy(&mult_eth, ð->d_addr); + rte_ether_addr_copy(&mult_eth, ð->dst_addr); } else { - rte_ether_addr_copy(&neighbour->eth_addr, ð->d_addr); + rte_ether_addr_copy(&neighbour->eth_addr, ð->dst_addr); } - rte_ether_addr_copy(&port->addr, ð->s_addr); + rte_ether_addr_copy(&port->addr, ð->src_addr); pkt_type = (uint16_t)m->packet_type; eth->ether_type = rte_cpu_to_be_16(pkt_type); } @@ -510,12 +510,12 @@ int neigh_resolve_input(struct rte_mbuf *m, struct netif_port *port) (uint16_t)sizeof(struct rte_ether_hdr)); if (rte_be_to_cpu_16(arp->arp_opcode) == RTE_ARP_OP_REQUEST) { - rte_ether_addr_copy(ð->s_addr, ð->d_addr); - rte_memcpy(ð->s_addr, &port->addr, 6); + rte_ether_addr_copy(ð->src_addr, ð->dst_addr); + rte_memcpy(ð->src_addr, &port->addr, 6); arp->arp_opcode = rte_cpu_to_be_16(RTE_ARP_OP_REPLY); rte_ether_addr_copy(&arp->arp_data.arp_sha, &arp->arp_data.arp_tha); - rte_ether_addr_copy(ð->s_addr, &arp->arp_data.arp_sha); + rte_ether_addr_copy(ð->src_addr, &arp->arp_data.arp_sha); ipaddr = arp->arp_data.arp_sip; arp->arp_data.arp_sip = arp->arp_data.arp_tip; @@ -568,8 +568,8 @@ static int neigh_send_arp(struct netif_port *port, uint32_t src_ip, uint32_t dst eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); arp = (struct rte_arp_hdr *)ð[1]; - memset(ð->d_addr, 0xFF, 6); - rte_ether_addr_copy(&port->addr, ð->s_addr); + memset(ð->dst_addr, 0xFF, 6); + rte_ether_addr_copy(&port->addr, ð->src_addr); eth->ether_type = htons(RTE_ETHER_TYPE_ARP); memset(arp, 0, sizeof(struct rte_arp_hdr)); diff --git a/src/netif.c b/src/netif.c index 7f1d7b6d6..e3596c574 100644 --- a/src/netif.c +++ b/src/netif.c @@ -24,8 +24,11 @@ #include #include #include -#include "dpdk.h" +#include +#include + #include "conf/common.h" +#include "dpdk.h" #include "netif.h" #include "netif_addr.h" #include "conf/netif_addr.h" @@ -41,11 +44,7 @@ #include "scheduler.h" #include "netif_flow.h" #include "linux_if.h" - -#include -#include -#include -#include +#include "ipvs/redirect.h" #ifdef CONFIG_ICMP_REDIRECT_CORE #include "icmp.h" #endif @@ -73,7 +72,7 @@ int netif_pktpool_mbuf_cache = NETIF_PKTPOOL_MBUF_CACHE_DEF; #define ARP_RING_SIZE 2048 -#define RETA_CONF_SIZE (ETH_RSS_RETA_SIZE_512 / RTE_RETA_GROUP_SIZE) +#define RETA_CONF_SIZE (RTE_ETH_RSS_RETA_SIZE_512 / RTE_ETH_RETA_GROUP_SIZE) /* physical nic id = phy_pid_base + index */ static portid_t phy_pid_base = 0; @@ -89,6 +88,9 @@ static uint16_t g_nports; /*for arp process*/ static struct rte_ring *arp_ring[DPVS_MAX_LCORE]; +/* use fuzzy match instead of perfect match, refer to dpdk:rte_flow_item_fuzzy */ +static int flow_fuzzy_match = 0; + #define NETIF_BOND_MODE_DEF BONDING_MODE_ROUND_ROBIN #define NETIF_BOND_NUMA_NODE_DEF 0 @@ -104,6 +106,7 @@ struct port_conf_stream { int tx_queue_nb; int tx_desc_nb; + bool tx_mbuf_fast_free; bool promisc_mode; bool allmulticast; @@ -162,6 +165,11 @@ static struct list_head port_ntab[NETIF_PORT_TABLE_BUCKETS]; /* hashed by name * /* function declarations */ static void kni_lcore_loop(void *dummy); +bool netif_flow_fuzzy_match(void) +{ + return !!flow_fuzzy_match; +}; + bool is_physical_port(portid_t pid) { return pid >= phy_pid_base && pid < phy_pid_end; @@ -278,9 +286,6 @@ static void pktpool_cache_handler(vector_t tokens) FREE_PTR(str); } -#ifdef CONFIG_DPVS_FDIR -static enum rte_fdir_mode g_fdir_mode = RTE_FDIR_MODE_PERFECT; - static void fdir_mode_handler(vector_t tokens) { char *mode, *str = set_value(tokens); @@ -289,19 +294,18 @@ static void fdir_mode_handler(vector_t tokens) mode = strlwr(str); if (!strncmp(mode, "perfect", sizeof("perfect"))) - g_fdir_mode = RTE_FDIR_MODE_PERFECT; + flow_fuzzy_match = 0; else if (!strncmp(mode, "signature", sizeof("signature"))) - g_fdir_mode = RTE_FDIR_MODE_SIGNATURE; + flow_fuzzy_match = 1; else { RTE_LOG(WARNING, NETIF, "invalid fdir_mode %s, using default %s\n", mode, "perfect"); - g_fdir_mode = RTE_FDIR_MODE_PERFECT; + flow_fuzzy_match = 0; } - RTE_LOG(INFO, NETIF, "g_fdir_mode = %s\n", mode); + RTE_LOG(INFO, NETIF, "fdir_mode = %s\n", mode); FREE_PTR(str); } -#endif static void device_handler(vector_t tokens) { @@ -323,6 +327,7 @@ static void device_handler(vector_t tokens) port_cfg->tx_queue_nb = -1; port_cfg->rx_desc_nb = NETIF_NB_RX_DESC_DEF; port_cfg->tx_desc_nb = NETIF_NB_TX_DESC_DEF; + port_cfg->tx_mbuf_fast_free = true; port_cfg->mtu = NETIF_DEFAULT_ETH_MTU; port_cfg->promisc_mode = false; @@ -443,6 +448,31 @@ static void tx_desc_nb_handler(vector_t tokens) FREE_PTR(str); } +static void tx_mbuf_fast_free_handler(vector_t tokens) +{ + int val = -1; + char *str = set_value(tokens); + struct port_conf_stream *current_device = list_entry(port_list.next, + struct port_conf_stream, port_list_node); + + assert(str); + if (!strcasecmp(str, "off")) + val = 0; + else if (!strcasecmp(str, "on")) + val = 1; + + if (val < 0) { + RTE_LOG(WARNING, NETIF, "invalid %s:tx_mbuf_fast_free, using default ON\n", + current_device->name); + current_device->tx_mbuf_fast_free = true; + } else { + current_device->tx_mbuf_fast_free = !!val; + RTE_LOG(INFO, NETIF, "%s:tx_mbuf_fast_free = %s\n", current_device->name, str); + } + + FREE_PTR(str); +} + static void promisc_mode_handler(vector_t tokens) { struct port_conf_stream *current_device = list_entry(port_list.next, @@ -905,9 +935,7 @@ void netif_keyword_value_init(void) /* KW_TYPE_INIT keyword */ netif_pktpool_nb_mbuf = NETIF_PKTPOOL_NB_MBUF_DEF; netif_pktpool_mbuf_cache = NETIF_PKTPOOL_MBUF_CACHE_DEF; -#ifdef CONFIG_DPVS_FDIR - g_fdir_mode = RTE_FDIR_MODE_PERFECT; -#endif + flow_fuzzy_match = 0; } /* KW_TYPE_NORMAL keyword */ } @@ -917,9 +945,7 @@ void install_netif_keywords(void) install_keyword_root("netif_defs", netif_defs_handler); install_keyword("pktpool_size", pktpool_size_handler, KW_TYPE_INIT); install_keyword("pktpool_cache", pktpool_cache_handler, KW_TYPE_INIT); -#ifdef CONFIG_DPVS_FDIR install_keyword("fdir_mode", fdir_mode_handler, KW_TYPE_INIT); -#endif install_keyword("device", device_handler, KW_TYPE_INIT); install_sublevel(); install_keyword("rx", NULL, KW_TYPE_INIT); @@ -932,6 +958,7 @@ void install_netif_keywords(void) install_sublevel(); install_keyword("queue_number", tx_queue_number_handler, KW_TYPE_INIT); install_keyword("descriptor_number", tx_desc_nb_handler, KW_TYPE_INIT); + install_keyword("mbuf_fast_free", tx_mbuf_fast_free_handler, KW_TYPE_INIT); install_sublevel_end(); install_keyword("promisc_mode", promisc_mode_handler, KW_TYPE_INIT); install_keyword("allmulticast", allmulticast_handler, KW_TYPE_INIT); @@ -1011,8 +1038,8 @@ static inline int parse_ether_hdr(struct rte_mbuf *mbuf, uint16_t port, uint16_t struct rte_ether_hdr *eth_hdr; char saddr[18], daddr[18]; eth_hdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); - rte_ether_format_addr(saddr, sizeof(saddr), ð_hdr->s_addr); - rte_ether_format_addr(daddr, sizeof(daddr), ð_hdr->d_addr); + rte_ether_format_addr(saddr, sizeof(saddr), ð_hdr->src_addr); + rte_ether_format_addr(daddr, sizeof(daddr), ð_hdr->dst_addr); RTE_LOG(INFO, NETIF, "[%s] lcore=%u port=%u queue=%u ethtype=%0x saddr=%s daddr=%s\n", __func__, rte_lcore_id(), port, queue, rte_be_to_cpu_16(eth_hdr->ether_type), saddr, daddr); @@ -1063,9 +1090,9 @@ __rte_unused static void pkt_send_back(struct rte_mbuf *mbuf, struct netif_port struct rte_ether_hdr *ehdr; struct rte_ether_addr eaddr; ehdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr*); - rte_ether_addr_copy(&ehdr->s_addr, &eaddr); - rte_ether_addr_copy(&ehdr->d_addr, &ehdr->s_addr); - rte_ether_addr_copy(&eaddr, &ehdr->d_addr); + rte_ether_addr_copy(&ehdr->src_addr, &eaddr); + rte_ether_addr_copy(&ehdr->dst_addr, &ehdr->src_addr); + rte_ether_addr_copy(&eaddr, &ehdr->dst_addr); netif_xmit(mbuf, port); } #endif @@ -2127,8 +2154,7 @@ static inline void netif_tx_burst(lcoreid_t cid, portid_t pid, queueid_t qindex) dev = netif_port_get(pid); if (dev && (dev->flag & NETIF_PORT_FLAG_FORWARD2KNI)) { for (; i < txq->len; i++) { - if (NULL == (mbuf_copied = mbuf_copy(txq->mbufs[i], - pktmbuf_pool[dev->socket]))) + if (NULL == (mbuf_copied = mbuf_copy(txq->mbufs[i], pktmbuf_pool[dev->socket]))) RTE_LOG(WARNING, NETIF, "%s: fail to copy outbound mbuf into kni\n", __func__); else kni_ingress(mbuf_copied, dev); @@ -2225,11 +2251,11 @@ static inline int validate_xmit_mbuf(struct rte_mbuf *mbuf, int err = EDPVS_OK; /* 802.1q VLAN */ - if (mbuf->ol_flags & PKT_TX_VLAN_PKT) { + if (mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { if (!(dev->flag & NETIF_PORT_FLAG_TX_VLAN_INSERT_OFFLOAD)) { err = vlan_insert_tag(mbuf, htons(ETH_P_8021Q), mbuf_vlan_tag_get_id(mbuf)); - mbuf->ol_flags &= (~PKT_TX_VLAN_PKT); + mbuf->ol_flags &= (~RTE_MBUF_F_TX_VLAN); mbuf->vlan_tci = 0; } } @@ -2258,7 +2284,7 @@ int netif_hard_xmit(struct rte_mbuf *mbuf, struct netif_port *dev) /* send pkt on current lcore */ cid = rte_lcore_id(); - if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)) + if (likely(mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) mbuf->l2_len = sizeof(struct rte_ether_hdr); if (rte_get_main_lcore() == cid) { // master thread @@ -2314,6 +2340,18 @@ int netif_hard_xmit(struct rte_mbuf *mbuf, struct netif_port *dev) txq->len = 0; } +#ifdef CONFIG_DPVS_NETIF_DEBUG + if ((dev->flag & NETIF_PORT_FLAG_TX_MBUF_FAST_FREE) && txq->pktpool) { + if (txq->pktpool != mbuf->pool) { + RTE_LOG(ERR, NETIF, "%s:txq%d pktmbuf pool changed: %s->%s, please disable tx_mbuf_fast_free\n", + dev->name, txq->id, txq->pktpool->name, mbuf->pool->name); + txq->pktpool = mbuf->pool; + } + } else { + txq->pktpool = mbuf->pool; + } +#endif + lcore_stats[cid].obytes += mbuf->pkt_len; txq->mbufs[txq->len] = mbuf; txq->len++; @@ -2353,11 +2391,11 @@ int netif_xmit(struct rte_mbuf *mbuf, struct netif_port *dev) static inline eth_type_t eth_type_parse(const struct rte_ether_hdr *eth_hdr, const struct netif_port *dev) { - if (eth_addr_equal(&dev->addr, ð_hdr->d_addr)) + if (eth_addr_equal(&dev->addr, ð_hdr->dst_addr)) return ETH_PKT_HOST; - if (rte_is_multicast_ether_addr(ð_hdr->d_addr)) { - if (rte_is_broadcast_ether_addr(ð_hdr->d_addr)) + if (rte_is_multicast_ether_addr(ð_hdr->dst_addr)) { + if (rte_is_broadcast_ether_addr(ð_hdr->dst_addr)) return ETH_PKT_BROADCAST; else return ETH_PKT_MULTICAST; @@ -2436,7 +2474,7 @@ int netif_rcv_mbuf(struct netif_port *dev, lcoreid_t cid, struct rte_mbuf *mbuf, * to act as VLAN filter. */ if (eth_hdr->ether_type == htons(ETH_P_8021Q) || - mbuf->ol_flags & PKT_RX_VLAN_STRIPPED) { + mbuf->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED) { if (vlan_rcv(mbuf, netif_port_get(mbuf->port)) != EDPVS_OK) goto drop; dev = netif_port_get(mbuf->port); @@ -2468,7 +2506,7 @@ int netif_rcv_mbuf(struct netif_port *dev, lcoreid_t cid, struct rte_mbuf *mbuf, || (i == rte_get_main_lcore())) continue; /* rte_pktmbuf_clone will not clone pkt.data, just copy pointer! */ - mbuf_clone = rte_pktmbuf_clone(mbuf, pktmbuf_pool[rte_socket_id()]); + mbuf_clone = rte_pktmbuf_clone(mbuf, pktmbuf_pool[dev->socket]); if (unlikely(!mbuf_clone)) { RTE_LOG(WARNING, NETIF, "%s arp reply mbuf clone failed on lcore %d\n", __func__, i); @@ -2943,7 +2981,7 @@ static inline void kni_ingress_flow_xmit_vlan_trunk(struct netif_port *dev, mbuf = qconf->mbufs[i]; eh = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); if (eh->ether_type == htons(ETH_P_8021Q) || - mbuf->ol_flags & PKT_RX_VLAN_STRIPPED) { + mbuf->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED) { vlan_rcv(mbuf, dev); } else if (dev->type == PORT_TYPE_BOND_MASTER) { mbuf->port = dev->id; @@ -3074,19 +3112,19 @@ static void netif_dump_rss_reta(struct netif_port *port) { int i, len, pos; uint32_t reta_id, reta_pos; - char buf[ETH_RSS_RETA_SIZE_512 * 8]; + char buf[RTE_ETH_RSS_RETA_SIZE_512 * 8]; struct rte_eth_rss_reta_entry64 reta_info[RETA_CONF_SIZE]; if (port->type != PORT_TYPE_GENERAL && port->type != PORT_TYPE_BOND_SLAVE) return; if (unlikely(port->dev_info.reta_size == 0)) - if (unlikely(rte_eth_dev_info_get(port->id, &port->dev_info))) + if (unlikely(rte_eth_dev_info_get(port->id, &port->dev_info) != 0)) return; memset(reta_info, 0, sizeof(reta_info)); for (i = 0; i < port->dev_info.reta_size; i++) - reta_info[i / RTE_RETA_GROUP_SIZE].mask = UINT64_MAX; + reta_info[i / RTE_ETH_RETA_GROUP_SIZE].mask = UINT64_MAX; if (unlikely(rte_eth_dev_rss_reta_query(port->id, reta_info, port->dev_info.reta_size))) @@ -3095,8 +3133,8 @@ static void netif_dump_rss_reta(struct netif_port *port) buf[0] = '\0'; len = pos = 0; for (i = 0; i < port->dev_info.reta_size; i++) { - reta_id = i / RTE_RETA_GROUP_SIZE; - reta_pos = i % RTE_RETA_GROUP_SIZE; + reta_id = i / RTE_ETH_RETA_GROUP_SIZE; + reta_pos = i % RTE_ETH_RETA_GROUP_SIZE; if (i % 8 == 0) { len = snprintf(&buf[pos], sizeof(buf) - pos, "\n%4d: ", i); if (len >= sizeof(buf) - pos) { @@ -3143,8 +3181,8 @@ static int __netif_update_rss_reta(struct netif_port *port) memset(reta_conf, 0, sizeof(reta_conf)); for (i = 0; i < port->dev_info.reta_size; i++) { - reta_id = i / RTE_RETA_GROUP_SIZE; - reta_pos = i % RTE_RETA_GROUP_SIZE; + reta_id = i / RTE_ETH_RETA_GROUP_SIZE; + reta_pos = i % RTE_ETH_RETA_GROUP_SIZE; reta_conf[reta_id].mask = UINT64_MAX; reta_conf[reta_id].reta[reta_pos] = (uint16_t)(rssq[i % nrssq]); } @@ -3476,15 +3514,23 @@ static inline void setup_dev_of_flags(struct netif_port *port) port->flag |= NETIF_PORT_FLAG_ENABLED; /* tx offload conf and flags */ - if (port->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) + if (port->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) port->flag |= NETIF_PORT_FLAG_TX_IP_CSUM_OFFLOAD; - if (port->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM) + if (port->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) port->flag |= NETIF_PORT_FLAG_TX_TCP_CSUM_OFFLOAD; - if (port->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM) + if (port->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) port->flag |= NETIF_PORT_FLAG_TX_UDP_CSUM_OFFLOAD; + // Device supports optimization for fast release of mbufs. + // The feature is configurable via dpvs.conf. + // When set application must guarantee that per-queue all mbufs comes from + // the same mempool and has refcnt = 1. + // https://doc.dpdk.org/api/rte__ethdev_8h.html#a43f198c6b59d965130d56fd8f40ceac1 + if (!(port->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE)) + port->flag &= ~NETIF_PORT_FLAG_TX_MBUF_FAST_FREE; + /* FIXME: may be a bug in dev_info get for virtio device, * set the txq_of_flags manually for this type device */ if (strncmp(port->dev_info.driver_name, "net_virtio", strlen("net_virtio")) == 0) { @@ -3496,12 +3542,12 @@ static inline void setup_dev_of_flags(struct netif_port *port) /* * we may have multiple vlan dev on one rte_ethdev, * and mbuf->vlan_tci is RX only! - * while there's only one PVID (DEV_TX_OFFLOAD_VLAN_INSERT), + * while there's only one PVID (RTE_ETH_TX_OFFLOAD_VLAN_INSERT), * to make things easier, do not support TX VLAN instert offload. * or we have to check if VID is PVID (than to tx offload it). */ #if 0 - if (dev_info->tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT) { + if (dev_info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_VLAN_INSERT) { port->flag |= NETIF_PORT_FLAG_TX_VLAN_INSERT_OFFLOAD; port->dev_conf.txmode.hw_vlan_insert_pvid = 1; rte_eth_dev_set_vlan_pvid(); @@ -3509,11 +3555,9 @@ static inline void setup_dev_of_flags(struct netif_port *port) #endif /* rx offload conf and flags */ - if (port->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_STRIP) { + if (port->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_VLAN_STRIP) port->flag |= NETIF_PORT_FLAG_RX_VLAN_STRIP_OFFLOAD; - port->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; - } - if (port->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM) + if (port->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM) port->flag |= NETIF_PORT_FLAG_RX_IP_CSUM_OFFLOAD; /* enable lldp on physical port */ @@ -3588,8 +3632,7 @@ int netif_get_link(struct netif_port *dev, struct rte_eth_link *link) if (dev->netif_ops->op_get_link) return dev->netif_ops->op_get_link(dev, link); - rte_eth_link_get_nowait((uint8_t)dev->id, link); - return EDPVS_OK; + return rte_eth_link_get_nowait((uint8_t)dev->id, link); } int netif_get_promisc(struct netif_port *dev, bool *promisc) @@ -3693,21 +3736,21 @@ static int rss_resolve_proc(char *rss) int rss_value = 0; if (!strcmp(rss, "all")) - rss_value = ETH_RSS_IP | ETH_RSS_TCP | ETH_RSS_UDP; + rss_value = RTE_ETH_RSS_IP | RTE_ETH_RSS_TCP | RTE_ETH_RSS_UDP; else if (!strcmp(rss, "ip")) - rss_value = ETH_RSS_IP; + rss_value = RTE_ETH_RSS_IP; else if (!strcmp(rss, "tcp")) - rss_value = ETH_RSS_TCP; + rss_value = RTE_ETH_RSS_TCP; else if (!strcmp(rss, "udp")) - rss_value = ETH_RSS_UDP; + rss_value = RTE_ETH_RSS_UDP; else if (!strcmp(rss, "sctp")) - rss_value = ETH_RSS_SCTP; + rss_value = RTE_ETH_RSS_SCTP; else if (!strcmp(rss, "ether")) - rss_value = ETH_RSS_L2_PAYLOAD; + rss_value = RTE_ETH_RSS_L2_PAYLOAD; else if (!strcmp(rss, "port")) - rss_value = ETH_RSS_PORT; + rss_value = RTE_ETH_RSS_PORT; else if (!strcmp(rss, "tunnel")) - rss_value = ETH_RSS_TUNNEL; + rss_value = RTE_ETH_RSS_TUNNEL; return rss_value; } @@ -3718,7 +3761,10 @@ static void adapt_device_conf(portid_t port_id, uint64_t *rss_hf, { struct rte_eth_dev_info dev_info; - rte_eth_dev_info_get(port_id, &dev_info); + if (unlikely(rte_eth_dev_info_get(port_id, &dev_info) != 0)) { + RTE_LOG(WARNING, NETIF, "%s: fail to get dev_info of port %d\n", __func__, port_id); + return; + } if ((dev_info.flow_type_rss_offloads | *rss_hf) != dev_info.flow_type_rss_offloads) { @@ -3802,6 +3848,8 @@ static void fill_port_config(struct netif_port *port, char *promisc_on, char *al } port->rxq_desc_nb = cfg_stream->rx_desc_nb; port->txq_desc_nb = cfg_stream->tx_desc_nb; + if (cfg_stream->tx_mbuf_fast_free) + port->flag |= NETIF_PORT_FLAG_TX_MBUF_FAST_FREE; } else { /* using default configurations */ port->rxq_desc_nb = NETIF_NB_RX_DESC_DEF; @@ -3830,6 +3878,8 @@ static void fill_port_config(struct netif_port *port, char *promisc_on, char *al port->rxq_desc_nb = cfg_stream->rx_desc_nb; port->txq_desc_nb = cfg_stream->tx_desc_nb; port->mtu = cfg_stream->mtu; + if (cfg_stream->tx_mbuf_fast_free) + port->flag |= NETIF_PORT_FLAG_TX_MBUF_FAST_FREE; } else { port->rxq_desc_nb = NETIF_NB_RX_DESC_DEF; port->txq_desc_nb = NETIF_NB_TX_DESC_DEF; @@ -3860,7 +3910,7 @@ static int add_bond_slaves(struct netif_port *port) for (ii = 0; ii < port->bond->master.slave_nb; ii++) { slave = port->bond->master.slaves[ii]; - if (rte_eth_bond_slave_add(port->id, slave->id) < 0) { + if (rte_eth_bond_member_add(port->id, slave->id) < 0) { RTE_LOG(ERR, NETIF, "%s: fail to add slave %s to %s\n", __func__, slave->name, port->name); return EDPVS_DPDKAPIFAIL; @@ -3888,29 +3938,12 @@ static int add_bond_slaves(struct netif_port *port) port->socket = rte_eth_dev_socket_id(port->id); port->mbuf_pool = pktmbuf_pool[port->socket]; port_mtu_set(port); - rte_eth_dev_info_get(port->id, &port->dev_info); + if (rte_eth_dev_info_get(port->id, &port->dev_info)) + RTE_LOG(WARNING, NETIF, "%s: fail to update dev_info of %s\n", __func__, port->name); return EDPVS_OK; } -#ifdef CONFIG_DPVS_FDIR -static int config_fdir_conf(struct rte_fdir_conf *fdir_conf) -{ - int shift; - - /* how many mask bits needed? */ - for (shift = 0; (0x1<= 16) - return EDPVS_INVAL; - - fdir_conf->mask.dst_port_mask = htons(~((~0x0) << shift)); - fdir_conf->mode = g_fdir_mode; - - return EDPVS_OK; -} -#endif - /* * Note: Invoke the function after port is allocated and lcores are configured. */ @@ -3942,21 +3975,20 @@ int netif_port_start(struct netif_port *port) port->dev_info.max_tx_queues, port->nrxq, port->ntxq); } - // device configure - if ((ret = rte_eth_dev_set_mtu(port->id,port->mtu)) != EDPVS_OK) - return ret; -#ifdef CONFIG_DPVS_FDIR - ret = config_fdir_conf(&port->dev_conf.fdir_conf); - if (ret != EDPVS_OK) - return ret; -#endif + if (port->flag & NETIF_PORT_FLAG_RX_IP_CSUM_OFFLOAD) + port->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; + if (port->flag & NETIF_PORT_FLAG_RX_VLAN_STRIP_OFFLOAD) + port->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP; + if (port->flag & NETIF_PORT_FLAG_TX_IP_CSUM_OFFLOAD) - port->dev_conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM; + port->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; if (port->flag & NETIF_PORT_FLAG_TX_UDP_CSUM_OFFLOAD) - port->dev_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM; + port->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM; if (port->flag & NETIF_PORT_FLAG_TX_TCP_CSUM_OFFLOAD) - port->dev_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM; - port->dev_conf.txmode.offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE; + port->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM; + if (port->flag & NETIF_PORT_FLAG_TX_MBUF_FAST_FREE) + port->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; + adapt_device_conf(port->id, &port->dev_conf.rx_adv_conf.rss_conf.rss_hf, &port->dev_conf.rxmode.offloads, &port->dev_conf.txmode.offloads); @@ -4001,6 +4033,10 @@ int netif_port_start(struct netif_port *port) } } + // device configure + if ((ret = rte_eth_dev_set_mtu(port->id,port->mtu)) != EDPVS_OK) + return ret; + // add slaves and update stored info for bonding device if (port->type == PORT_TYPE_BOND_MASTER) { ret = add_bond_slaves(port); @@ -4024,11 +4060,11 @@ int netif_port_start(struct netif_port *port) // wait the device link up RTE_LOG(INFO, NETIF, "Waiting for %s link up, be patient ...\n", port->name); for (ii = 0; ii < wait_link_up_msecs; ii++) { - rte_eth_link_get_nowait(port->id, &link); - if (link.link_status) { + ret = rte_eth_link_get_nowait(port->id, &link); + if (!ret && link.link_status) { RTE_LOG(INFO, NETIF, ">> %s: link up - speed %u Mbps - %s\n", port->name, (unsigned)link.link_speed, - (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + (link.link_duplex == RTE_ETH_LINK_FULL_DUPLEX) ? "full-duplex" : "half-duplex"); break; } @@ -4069,6 +4105,17 @@ int netif_port_start(struct netif_port *port) RTE_LOG(WARNING, NETIF, "%s: %s update rss reta failed (cause: %s)\n", __func__, port->name, dpvs_strerror(ret)); +#if 0 + /* disable kni tx-csum offload feature + * + * Why we redo this while it's done in virtio_kni_start? We found in some systems, + * say linux 5.10.134, the tx-csum feature of virtio kni device gets re-enabled + * some moments later after virito_kni_start. + * */ + if (kni_dev_exist(port)) + disable_kni_tx_csum_offload(port->kni.name); +#endif + return EDPVS_OK; } @@ -4217,42 +4264,19 @@ static int relate_bonding_device(void) static struct rte_eth_conf default_port_conf = { .rxmode = { - .mq_mode = ETH_MQ_RX_RSS, - .max_rx_pkt_len = ETHER_MAX_LEN, - .split_hdr_size = 0, - .offloads = DEV_RX_OFFLOAD_IPV4_CKSUM, + .mq_mode = RTE_ETH_MQ_RX_RSS, + .mtu = RTE_ETHER_MTU, + .offloads = RTE_ETH_RX_OFFLOAD_IPV4_CKSUM, }, .rx_adv_conf = { .rss_conf = { .rss_key = NULL, - .rss_hf = /*ETH_RSS_IP*/ ETH_RSS_TCP, + .rss_hf = /*RTE_ETH_RSS_IP*/ RTE_ETH_RSS_TCP, }, }, .txmode = { - .mq_mode = ETH_MQ_TX_NONE, + .mq_mode = RTE_ETH_MQ_TX_NONE, }, -#ifdef CONFIG_DPVS_FDIR - .fdir_conf = { - .mode = RTE_FDIR_MODE_PERFECT, /* maybe changed by config file */ - .pballoc = RTE_FDIR_PBALLOC_64K, - .status = RTE_FDIR_REPORT_STATUS, - .mask = { - .ipv4_mask = { - .dst_ip = 0xFFFFFFFF, - }, - .ipv6_mask = { - .dst_ip = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }, - }, - /* to be changed according to slave lcore number in use */ - .dst_port_mask = 0x0700, - }, - .drop_queue = 127, - .flex_conf = { - .nb_payloads = 0, - .nb_flexmasks = 0, - }, - }, -#endif }; int netif_print_port_conf(const struct rte_eth_conf *port_conf, char *buf, int *len) @@ -4264,22 +4288,22 @@ int netif_print_port_conf(const struct rte_eth_conf *port_conf, char *buf, int * port_conf = &default_port_conf; memset(buf, 0, *len); - if (port_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) { + if (port_conf->rxmode.mq_mode == RTE_ETH_MQ_RX_RSS) { memset(tbuf2, 0, sizeof(tbuf2)); if (port_conf->rx_adv_conf.rss_conf.rss_hf) { - if (port_conf->rx_adv_conf.rss_conf.rss_hf & ETH_RSS_IP) + if (port_conf->rx_adv_conf.rss_conf.rss_hf & RTE_ETH_RSS_IP) snprintf(tbuf2 + strlen(tbuf2), sizeof(tbuf2) - strlen(tbuf2), "ETH_RSS_IP "); - if (port_conf->rx_adv_conf.rss_conf.rss_hf & ETH_RSS_TCP) + if (port_conf->rx_adv_conf.rss_conf.rss_hf & RTE_ETH_RSS_TCP) snprintf(tbuf2 + strlen(tbuf2), sizeof(tbuf2) - strlen(tbuf2), "ETH_RSS_TCP "); - if (port_conf->rx_adv_conf.rss_conf.rss_hf & ETH_RSS_UDP) + if (port_conf->rx_adv_conf.rss_conf.rss_hf & RTE_ETH_RSS_UDP) snprintf(tbuf2 + strlen(tbuf2), sizeof(tbuf2) - strlen(tbuf2), "ETH_RSS_UDP "); - if (port_conf->rx_adv_conf.rss_conf.rss_hf & ETH_RSS_SCTP) + if (port_conf->rx_adv_conf.rss_conf.rss_hf & RTE_ETH_RSS_SCTP) snprintf(tbuf2 + strlen(tbuf2), sizeof(tbuf2) - strlen(tbuf2), "ETH_RSS_SCTP "); - if (port_conf->rx_adv_conf.rss_conf.rss_hf & ETH_RSS_L2_PAYLOAD) + if (port_conf->rx_adv_conf.rss_conf.rss_hf & RTE_ETH_RSS_L2_PAYLOAD) snprintf(tbuf2 + strlen(tbuf2), sizeof(tbuf2) - strlen(tbuf2), "ETH_RSS_L2_PAYLOAD "); - if (port_conf->rx_adv_conf.rss_conf.rss_hf & ETH_RSS_PORT) + if (port_conf->rx_adv_conf.rss_conf.rss_hf & RTE_ETH_RSS_PORT) snprintf(tbuf2 + strlen(tbuf2), sizeof(tbuf2) - strlen(tbuf2), "ETH_RSS_PORT "); - if (port_conf->rx_adv_conf.rss_conf.rss_hf & ETH_RSS_TUNNEL) + if (port_conf->rx_adv_conf.rss_conf.rss_hf & RTE_ETH_RSS_TUNNEL) snprintf(tbuf2 + strlen(tbuf2), sizeof(tbuf2) - strlen(tbuf2), "ETH_RSS_TUNNEL "); } else { snprintf(tbuf2, sizeof(tbuf2), "Inhibited"); @@ -4326,7 +4350,8 @@ static void dpdk_port_setup(struct netif_port *dev) rte_eth_macaddr_get(dev->id, &dev->addr); rte_eth_dev_get_mtu(dev->id, &dev->mtu); - rte_eth_dev_info_get(dev->id, &dev->dev_info); + if (rte_eth_dev_info_get(dev->id, &dev->dev_info)) + memset(&dev->dev_info, 0, sizeof(dev->dev_info)); setup_dev_of_flags(dev); } @@ -4340,7 +4365,8 @@ static void bond_port_setup(struct netif_port *dev) rte_eth_macaddr_get(dev->id, &dev->addr); rte_eth_dev_get_mtu(dev->id, &dev->mtu); - rte_eth_dev_info_get(dev->id, &dev->dev_info); + if (rte_eth_dev_info_get(dev->id, &dev->dev_info)) + memset(&dev->dev_info, 0, sizeof(dev->dev_info)); setup_dev_of_flags(dev); } @@ -4816,28 +4842,28 @@ static int get_port_basic(struct netif_port *port, void **out, size_t *out_len) get->link_speed = link.link_speed; switch (link.link_status) { - case ETH_LINK_UP: + case RTE_ETH_LINK_UP: snprintf(get->link_status, sizeof(get->link_status), "%s", "UP"); break; - case ETH_LINK_DOWN: + case RTE_ETH_LINK_DOWN: snprintf(get->link_status, sizeof(get->link_status), "%s", "DOWN"); break; } switch (link.link_duplex) { - case ETH_LINK_HALF_DUPLEX: + case RTE_ETH_LINK_HALF_DUPLEX: snprintf(get->link_duplex, sizeof(get->link_duplex), "%s", "half-duplex"); break; - case ETH_LINK_FULL_DUPLEX: + case RTE_ETH_LINK_FULL_DUPLEX: snprintf(get->link_duplex, sizeof(get->link_duplex), "%s", "full-duplex"); break; } switch (link.link_autoneg) { - case ETH_LINK_FIXED: + case RTE_ETH_LINK_FIXED: snprintf(get->link_autoneg, sizeof(get->link_autoneg), "%s", "fixed-nego"); break; - case ETH_LINK_AUTONEG: + case RTE_ETH_LINK_AUTONEG: snprintf(get->link_autoneg, sizeof(get->link_autoneg), "%s", "auto-nego"); break; } @@ -4872,6 +4898,8 @@ static int get_port_basic(struct netif_port *port, void **out, size_t *out_len) get->ol_tx_udp_csum = 1; if (port->flag & NETIF_PORT_FLAG_LLDP) get->lldp = 1; + if (port->flag & NETIF_PORT_FLAG_TX_MBUF_FAST_FREE) + get->ol_tx_fast_free = 1; *out = get; *out_len = sizeof(netif_nic_basic_get_t); @@ -4882,22 +4910,29 @@ static int get_port_basic(struct netif_port *port, void **out, size_t *out_len) static inline void copy_dev_info(struct netif_nic_dev_get *get, const struct rte_eth_dev_info *dev_info) { - const struct rte_pci_device *pci_dev = NULL; #if RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0) - pci_dev = dev_info->pci_dev; -#else - if (dev_info->device) { - const struct rte_bus *bus = NULL; - bus = rte_bus_find_by_device(dev_info->device); - if (bus && !strcmp(bus->name, "pci")) { - pci_dev = RTE_DEV_TO_PCI(dev_info->device); - } + const struct rte_pci_device *pci_dev = dev_info->pci_dev; + if (pci_dev) + snprintf(get->pci_addr, sizeof(get->pci_addr), "%04x:%02x:%02x.%0x", + pci_dev->addr.domain, pci_dev->addr.bus, + pci_dev->addr.devid, pci_dev->addr.function); +#elif RTE_VERSION < RTE_VERSION_NUM(24, 11, 0, 0) + const struct rte_pci_device *pci_dev; + const struct rte_bus *bus = rte_bus_find_by_device(dev_info->device); + if (bus && !strcmp(bus->name, "pci")) { + pci_dev = RTE_DEV_TO_PCI(dev_info->device); + snprintf(get->pci_addr, sizeof(get->pci_addr), "%04x:%02x:%02x.%0x", + pci_dev->addr.domain, pci_dev->addr.bus, + pci_dev->addr.devid, pci_dev->addr.function); } +#else + // FIXME: + // `rte_bus`, `rte_pci_device` became internal structs in dpdk-24.11 that dereference + // are not allowed. Besides, no exported DPDK APIs are found to retrieve the PCI bus + // for a device. Just place the rte_bus name here, hoping fix the issue someday. + const struct rte_bus *bus = rte_bus_find_by_device(dev_info->device); + snprintf(get->pci_addr, sizeof(get->pci_addr), "%s", rte_bus_name(bus) ?: "unknown"); #endif - if (pci_dev) - snprintf(get->pci_addr, sizeof(get->pci_addr), "%04x:%02x:%02x:%0x", - pci_dev->addr.domain, pci_dev->addr.bus, - pci_dev->addr.devid, pci_dev->addr.function); if (dev_info->driver_name) strncpy(get->driver_name, dev_info->driver_name, sizeof(get->driver_name)); get->if_index = dev_info->if_index; @@ -4943,8 +4978,8 @@ static int get_port_ext_info(struct netif_port *port, void **out, size_t *out_le /* dev info */ if (is_physical_port( port->id) || is_bond_port(port->id)) { - rte_eth_dev_info_get(port->id, &dev_info); - copy_dev_info(&get->dev_info, &dev_info); + if (likely(rte_eth_dev_info_get(port->id, &dev_info) == 0)) + copy_dev_info(&get->dev_info, &dev_info); } /* cfg_queues */ @@ -5092,9 +5127,9 @@ static int get_bond_status(struct netif_port *port, void **out, size_t *out_len) get->mode = rte_eth_bond_mode_get(port->id); primary = rte_eth_bond_primary_get(port->id); - get->slave_nb = rte_eth_bond_slaves_get(port->id, + get->slave_nb = rte_eth_bond_members_get(port->id, slaves, NETIF_MAX_BOND_SLAVES); - get->active_nb = rte_eth_bond_active_slaves_get(port->id, + get->active_nb = rte_eth_bond_active_members_get(port->id, actives, NETIF_MAX_BOND_SLAVES); for (i = 0; i < get->slave_nb; i++) { is_active = false; @@ -5292,27 +5327,38 @@ static int set_port(struct netif_port *port, const netif_nic_set_t *port_cfg) int err; struct rte_eth_link link; err = rte_eth_dev_set_link_up(port->id); - rte_eth_link_get(port->id, &link); - if (link.link_status == ETH_LINK_DOWN) { - RTE_LOG(WARNING, NETIF, "set %s link up [ FAIL ] -- %d\n", + if (err < 0) + RTE_LOG(WARNING, NETIF, "set %s link up failed, device error code %d\n", port_cfg->pname, err); + if (!rte_eth_link_get(port->id, &link)) { + if (link.link_status == RTE_ETH_LINK_DOWN) { + RTE_LOG(WARNING, NETIF, "set %s link up [ FAIL ]\n", port_cfg->pname); + } else { + RTE_LOG(INFO, NETIF, "set %s link up [ OK ]" + " --- speed %dMbps %s-duplex %s-neg\n", + port_cfg->pname, link.link_speed, + link.link_duplex ? "full" : "half", + link.link_autoneg ? "auto" : "fixed"); + } } else { - RTE_LOG(INFO, NETIF, "set %s link up [ OK ]" - " --- speed %dMbps %s-duplex %s-neg\n", - port_cfg->pname, link.link_speed, - link.link_duplex ? "full" : "half", - link.link_autoneg ? "auto" : "fixed"); + RTE_LOG(WARNING, NETIF, "set %s link up [ UNKNOWN ]\n", port_cfg->pname); } } else if (port_cfg->link_status_down) { int err; struct rte_eth_link link; err = rte_eth_dev_set_link_down(port->id); - rte_eth_link_get(port->id, &link); - if (link.link_status == ETH_LINK_UP) { - RTE_LOG(WARNING, NETIF, "set %s link down [ FAIL ] -- %d\n", + if (err < 0) + RTE_LOG(WARNING, NETIF, "set %s link down failed, device error code %d\n", port_cfg->pname, err); + if (!rte_eth_link_get(port->id, &link)) { + if (link.link_status == RTE_ETH_LINK_UP) { + RTE_LOG(WARNING, NETIF, "set %s link down [ FAIL ] -- %d\n", + port_cfg->pname, err); + } else { + RTE_LOG(INFO, NETIF, "set %s link down [ OK ]\n", port_cfg->pname); + } } else { - RTE_LOG(INFO, NETIF, "set %s link down [ OK ]\n", port_cfg->pname); + RTE_LOG(WARNING, NETIF, "set %s link down [ UNKNOWN ]\n", port_cfg->pname); } } @@ -5386,13 +5432,13 @@ static int set_bond(struct netif_port *port, const netif_bond_set_t *bond_cfg) if (!slave) return EDPVS_NOTEXIST; if (bond_cfg->act == ACT_ADD) { - if (!rte_eth_bond_slave_add(port->id, slave->id)) { + if (!rte_eth_bond_member_add(port->id, slave->id)) { RTE_LOG(INFO, NETIF, "slave %s is added to %s\n", slave->name, port->name); port->bond->master.slaves[port->bond->master.slave_nb++] = slave; } } else if (bond_cfg->act == ACT_DEL) { - if (!rte_eth_bond_slave_remove(port->id, slave->id)) { + if (!rte_eth_bond_member_remove(port->id, slave->id)) { RTE_LOG(INFO, NETIF, "slave %s is removed from %s\n", slave->name, port->name); for (i = 0, j = 0; i < port->bond->master.slave_nb; i++) { diff --git a/src/netif_flow.c b/src/netif_flow.c index 0d00da98c..5dc94b73b 100644 --- a/src/netif_flow.c +++ b/src/netif_flow.c @@ -25,8 +25,8 @@ /* uncomment the macro if rte_flow pmd driver is not thread-safe. */ // #define CONFIG_DEV_FLOW_LOCK -/* sapool pattern stack: ETH | IP | TCP/UDP | END */ -#define SAPOOL_PATTERN_NUM 4 +/* sapool pattern stack: ETH | IP | TCP/UDP | [ FUZZY ] | END */ +#define SAPOOL_PATTERN_NUM 5 /* sapool action stack: QUEUE | END */ #define SAPOOL_ACTION_NUM 2 /* kni flow pattern stack: ETH | IP | END */ @@ -282,6 +282,7 @@ int netif_sapool_flow_add(struct netif_port *dev, lcoreid_t cid, struct rte_flow_item_ipv6 ip6_spec, ip6_mask; struct rte_flow_item_tcp tcp_spec, tcp_mask; struct rte_flow_item_udp udp_spec, udp_mask; + struct rte_flow_item_fuzzy fuzzy_spec = { .thresh = 0xFFFFFFFF }; queueid_t queue_id; struct rte_flow_action_queue queue; @@ -332,7 +333,14 @@ int netif_sapool_flow_add(struct netif_port *dev, lcoreid_t cid, pattern[2].type = RTE_FLOW_ITEM_TYPE_TCP; pattern[2].spec = &tcp_spec; pattern[2].mask = &tcp_mask; - pattern[3].type = RTE_FLOW_ITEM_TYPE_END; + if (netif_flow_fuzzy_match()) { + pattern[3].type = RTE_FLOW_ITEM_TYPE_FUZZY; + pattern[3].spec = &fuzzy_spec; + pattern[3].mask = &fuzzy_spec; + pattern[4].type = RTE_FLOW_ITEM_TYPE_END; + } else { + pattern[3].type = RTE_FLOW_ITEM_TYPE_END; + } /* set tcp flow */ resp.size = flows->size; diff --git a/src/vlan.c b/src/vlan.c index 5939e0a32..f449ee927 100644 --- a/src/vlan.c +++ b/src/vlan.c @@ -113,7 +113,7 @@ static int vlan_xmit(struct rte_mbuf *mbuf, struct netif_port *dev) */ if (ethhdr->ether_type != htons(ETH_P_8021Q)) { mbuf->vlan_tci = ntohs(vlan->vlan_id); - mbuf->ol_flags |= PKT_TX_VLAN_PKT; + mbuf->ol_flags |= RTE_MBUF_F_TX_VLAN; } /* hand over it to real device */ @@ -407,7 +407,7 @@ static inline int vlan_untag_mbuf(struct rte_mbuf *mbuf) struct vlan_ethhdr *vehdr = NULL; /* VLAN RX offloaded (vlan stripped by HW) ? */ - if (mbuf->ol_flags & PKT_RX_VLAN_STRIPPED) + if (mbuf->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED) return EDPVS_OK; if (unlikely(mbuf_may_pull(mbuf, sizeof(struct rte_ether_hdr) + \ @@ -417,7 +417,7 @@ static inline int vlan_untag_mbuf(struct rte_mbuf *mbuf) /* the data_off of mbuf is still at ethernet header. */ vehdr = rte_pktmbuf_mtod(mbuf, struct vlan_ethhdr *); - mbuf->ol_flags |= PKT_RX_VLAN_STRIPPED; /* "borrow" it */ + mbuf->ol_flags |= RTE_MBUF_F_RX_VLAN_STRIPPED; /* "borrow" it */ mbuf->vlan_tci = ntohs(vehdr->h_vlan_TCI); /* strip the vlan header */ @@ -455,11 +455,11 @@ int vlan_rcv(struct rte_mbuf *mbuf, struct netif_port *real_dev) * "Our lower layer thinks this is not local, let's make sure. * This allows the VLAN to have a different MAC than the * underlying device, and still route correctly." */ - if (eth_addr_equal(&ehdr->d_addr, &dev->addr)) + if (eth_addr_equal(&ehdr->dst_addr, &dev->addr)) mbuf->packet_type = ETH_PKT_HOST; } - mbuf->ol_flags &= (~PKT_RX_VLAN_STRIPPED); + mbuf->ol_flags &= (~RTE_MBUF_F_RX_VLAN_STRIPPED); mbuf->vlan_tci = 0; /* statistics */ diff --git a/tools/dpip/link.c b/tools/dpip/link.c index acaab26b5..16e264a57 100644 --- a/tools/dpip/link.c +++ b/tools/dpip/link.c @@ -274,6 +274,8 @@ static int dump_nic_basic(char *name, int namelen) printf("OF_TX_TCP_CSUM "); if (get.ol_tx_udp_csum) printf("OF_TX_UDP_CSUM "); + if (get.ol_tx_fast_free) + printf("OF_TX_FAST_FREE "); printf("\n"); return EDPVS_OK; diff --git a/tools/dpvs-agent/cmd/dpvs-agent-server/Makefile b/tools/dpvs-agent/cmd/dpvs-agent-server/Makefile index e5cd00eef..61b1dc149 100644 --- a/tools/dpvs-agent/cmd/dpvs-agent-server/Makefile +++ b/tools/dpvs-agent/cmd/dpvs-agent-server/Makefile @@ -11,7 +11,7 @@ RM = rm all: $(TARGET) -$(TARGET): +$(TARGET): go-proxy -$(GO) mod tidy $(GO_BUILD) -o $@ @@ -19,6 +19,9 @@ clean: $(GO_CLEAN) -$(RM) $(TARGET) +go-proxy: + $(GO) env -w GOPROXY=https://goproxy.cn,direct + install: $(TARGET) $(INSTALL) -m 0755 -d $(INSDIR) $(INSTALL) -m 0744 $(TARGET) $(INSDIR) diff --git a/tools/healthcheck/Makefile b/tools/healthcheck/Makefile index fe4d9400a..7f00cfa01 100644 --- a/tools/healthcheck/Makefile +++ b/tools/healthcheck/Makefile @@ -9,10 +9,13 @@ GO_CLEAN = $(GO) clean all: $(TARGET) -$(TARGET): +$(TARGET): go-proxy -$(GO) mod tidy $(GO_BUILD) -o $@ +go-proxy: + $(GO) env -w GOPROXY=https://goproxy.cn,direct + clean: $(GO_CLEAN)