【微知】RDMA用户态库相关的多个库及其功能分析(一)

背景

本文介绍了与RDMA(远程直接内存访问)相关的多个库及其功能。首先,rdma-core-devel库提供了开发所需的头文件和库。其次,libibverbs库是RDMA的核心库,提供了与verbs相关的接口,包括计数器操作(创建、销毁和读取)。librdmacm库则提供了RDMA通信管理相关的头文件。libibumad库属于InfiniBand的用户空间管理数据报工具,而非RDMA域。此外,本文还介绍了几个工具库,如librdmacm-utils和libibverbs-utils,它们提供了如ucmatose、ibv_devinfo等实用工具,用于查询设备信息和限制参数(如最大QP、CQ、MR等)。这些库和工具为RDMA应用的开发和调试提供了重要支持。

x 各种lib库

x.1 rdma-core-devel udev等触发机制

头文件:
在这里插入图片描述
库:
在这里插入图片描述
早期ibdev2netdev也在rdma-core中:

x.2 libibverbs IB的verbs的库,最核心的库提供verbs

x.2.1 提供的头文件(cmake中)

在这里插入图片描述

x.2.2 ibverbs中counter相关的action

enum uverbs_methods_actions_counters_ops {
	UVERBS_METHOD_COUNTERS_CREATE,
	UVERBS_METHOD_COUNTERS_DESTROY,
	UVERBS_METHOD_COUNTERS_READ,
};

3个action对应3个操作函数
在这里插入图片描述

x.3 librdmacm RDMACM的库CM建链相关

提供的头文件(cmake中)
在这里插入图片描述

x.4 libibumad

这个库是属于ib的umad,不是rdma域
Userspace Management Datagram,用户空间管理数据报

x.5 util类的库

x.5.1 librdmacm-utils 工具库 CM建链相关的

提供的工具,比如比较常用的ucmatose
在这里插入图片描述

x.5.2 libibverbs-utils工具

x.5.2.1 基础信息

提供ibv_devinfo和ibv_devices
在这里插入图片描述
比如:

x.5.2.2 ibv_devinfo的简单信息

该命令可以查看一些限制信息,比如:
max_qp
max_cq
max_mr
max_pd
max_mtu

[root@localhost ~]# ibv_devinfo  -v
hca_id: mlx5_0
        transport:                      InfiniBand (0)
        fw_ver:                         16.31.1014
        node_guid:                      08c0:eb03:00f4:e6a6
        sys_image_guid:                 08c0:eb03:00f4:e6a6
        vendor_id:                      0x02c9
        vendor_part_id:                 4119
        hw_ver:                         0x0
        board_id:                       MT_0000000080
        phys_port_cnt:                  1
        max_mr_size:                    0xffffffffffffffff
        page_size_cap:                  0xfffffffffffff000
        max_qp:                         262144
        max_qp_wr:                      32768
        device_cap_flags:               0x25321c36
                                        BAD_PKEY_CNTR
                                        BAD_QKEY_CNTR
                                        AUTO_PATH_MIG
                                        CHANGE_PHY_PORT
                                        PORT_ACTIVE_EVENT
                                        SYS_IMAGE_GUID
                                        RC_RNR_NAK_GEN
                                        MEM_WINDOW
                                        XRC
                                        MEM_MGT_EXTENSIONS
                                        MEM_WINDOW_TYPE_2B
                                        RAW_IP_CSUM
                                        MANAGED_FLOW_STEERING
        max_sge:                        30
        max_sge_rd:                     30
        max_cq:                         16777216
        max_cqe:                        4194303
        max_mr:                         16777216
        max_pd:                         8388608
        max_qp_rd_atom:                 16
        max_ee_rd_atom:                 0
        max_res_rd_atom:                4194304
        max_qp_init_rd_atom:            16
        max_ee_init_rd_atom:            0
        atomic_cap:                     ATOMIC_HCA (1)
        max_ee:                         0
        max_rdd:                        0
        max_mw:                         16777216
        max_raw_ipv6_qp:                0
        max_raw_ethy_qp:                0
        max_mcast_grp:                  2097152
        max_mcast_qp_attach:            240
        max_total_mcast_qp_attach:      503316480
        max_ah:                         2147483647
        max_fmr:                        0
        max_srq:                        8388608
        max_srq_wr:                     32767
        max_srq_sge:                    31
        max_pkeys:                      128
        local_ca_ack_delay:             16
        general_odp_caps:
                                        ODP_SUPPORT
                                        ODP_SUPPORT_IMPLICIT
        rc_odp_caps:
                                        SUPPORT_SEND
                                        SUPPORT_RECV
                                        SUPPORT_WRITE
                                        SUPPORT_READ
                                        SUPPORT_SRQ
        uc_odp_caps:
                                        NO SUPPORT
        ud_odp_caps:
                                        SUPPORT_SEND
        xrc_odp_caps:
                                        SUPPORT_SEND
                                        SUPPORT_WRITE
                                        SUPPORT_READ
                                        SUPPORT_SRQ
        completion timestamp_mask:                      0x7fffffffffffffff
        hca_core_clock:                 156250kHZ
        raw packet caps:
                                        C-VLAN stripping offload
                                        Scatter FCS offload
                                        IP csum offload
                                        Delay drop
        device_cap_flags_ex:            0x1425321C36
                                        RAW_SCATTER_FCS
                                        PCI_WRITE_END_PADDING
        tso_caps:
                max_tso:                        262144
                supported_qp:
                                        SUPPORT_RAW_PACKET
        rss_caps:
                max_rwq_indirection_tables:                     1048576
                max_rwq_indirection_table_size:                 2048
                rx_hash_function:                               0x1
                rx_hash_fields_mask:                            0x800000FF
                supported_qp:
                                        SUPPORT_RAW_PACKET
        max_wq_type_rq:                 8388608
        packet_pacing_caps:
                qp_rate_limit_min:      1kbps
                qp_rate_limit_max:      25000000kbps
                supported_qp:
                                        SUPPORT_RAW_PACKET
        tag matching not supported

        cq moderation caps:
                max_cq_count:   65535
                max_cq_period:  4095 us

        maximum available device memory:        131072Bytes

        num_comp_vectors:               4
                port:   1
                        state:                  PORT_ACTIVE (4)
                        max_mtu:                4096 (5)
                        active_mtu:             1024 (3)
                        sm_lid:                 0
                        port_lid:               0
                        port_lmc:               0x00
                        link_layer:             Ethernet
                        max_msg_sz:             0x40000000
                        port_cap_flags:         0x04010000
                        port_cap_flags2:        0x0000
                        max_vl_num:             invalid value (0)
                        bad_pkey_cntr:          0x0
                        qkey_viol_cntr:         0x0
                        sm_sl:                  0
                        pkey_tbl_len:           1
                        gid_tbl_len:            255
                        subnet_timeout:         0
                        init_type_reply:        0
                        active_width:           1X (1)
                        active_speed:           25.0 Gbps (32)
                        phys_state:             LINK_UP (5)
                        GID[  0]:               fe80:0000:0000:0000:0ac0:ebff:fef4:e6a6, RoCE v1
                        GID[  1]:               fe80::ac0:ebff:fef4:e6a6, RoCE v2

hca_id: mlx5_1
        transport:                      InfiniBand (0)
        fw_ver:                         16.31.1014
        node_guid:                      08c0:eb03:00f4:e6a7
        sys_image_guid:                 08c0:eb03:00f4:e6a6
        vendor_id:                      0x02c9
        vendor_part_id:                 4119
        hw_ver:                         0x0
        board_id:                       MT_0000000080
        phys_port_cnt:                  1
        max_mr_size:                    0xffffffffffffffff
        page_size_cap:                  0xfffffffffffff000
        max_qp:                         262144
        max_qp_wr:                      32768
        device_cap_flags:               0x25321c36
                                        BAD_PKEY_CNTR
                                        BAD_QKEY_CNTR
                                        AUTO_PATH_MIG
                                        CHANGE_PHY_PORT
                                        PORT_ACTIVE_EVENT
                                        SYS_IMAGE_GUID
                                        RC_RNR_NAK_GEN
                                        MEM_WINDOW
                                        XRC
                                        MEM_MGT_EXTENSIONS
                                        MEM_WINDOW_TYPE_2B
                                        RAW_IP_CSUM
                                        MANAGED_FLOW_STEERING
        max_sge:                        30
        max_sge_rd:                     30
        max_cq:                         16777216
        max_cqe:                        4194303
        max_mr:                         16777216
        max_pd:                         8388608
        max_qp_rd_atom:                 16
        max_ee_rd_atom:                 0
        max_res_rd_atom:                4194304
        max_qp_init_rd_atom:            16
        max_ee_init_rd_atom:            0
        atomic_cap:                     ATOMIC_HCA (1)
        max_ee:                         0
        max_rdd:                        0
        max_mw:                         16777216
        max_raw_ipv6_qp:                0
        max_raw_ethy_qp:                0
        max_mcast_grp:                  2097152
        max_mcast_qp_attach:            240
        max_total_mcast_qp_attach:      503316480
        max_ah:                         2147483647
        max_fmr:                        0
        max_srq:                        8388608
        max_srq_wr:                     32767
        max_srq_sge:                    31
        max_pkeys:                      128
        local_ca_ack_delay:             16
        general_odp_caps:
                                        ODP_SUPPORT
                                        ODP_SUPPORT_IMPLICIT
        rc_odp_caps:
                                        SUPPORT_SEND
                                        SUPPORT_RECV
                                        SUPPORT_WRITE
                                        SUPPORT_READ
                                        SUPPORT_SRQ
        uc_odp_caps:
                                        NO SUPPORT
        ud_odp_caps:
                                        SUPPORT_SEND
        xrc_odp_caps:
                                        SUPPORT_SEND
                                        SUPPORT_WRITE
                                        SUPPORT_READ
                                        SUPPORT_SRQ
        completion timestamp_mask:                      0x7fffffffffffffff
        hca_core_clock:                 156250kHZ
        raw packet caps:
                                        C-VLAN stripping offload
                                        Scatter FCS offload
                                        IP csum offload
                                        Delay drop
        device_cap_flags_ex:            0x1425321C36
                                        RAW_SCATTER_FCS
                                        PCI_WRITE_END_PADDING
        tso_caps:
                max_tso:                        262144
                supported_qp:
                                        SUPPORT_RAW_PACKET
        rss_caps:
                max_rwq_indirection_tables:                     1048576
                max_rwq_indirection_table_size:                 2048
                rx_hash_function:                               0x1
                rx_hash_fields_mask:                            0x800000FF
                supported_qp:
                                        SUPPORT_RAW_PACKET
        max_wq_type_rq:                 8388608
        packet_pacing_caps:
                qp_rate_limit_min:      1kbps
                qp_rate_limit_max:      25000000kbps
                supported_qp:
                                        SUPPORT_RAW_PACKET
        tag matching not supported

        cq moderation caps:
                max_cq_count:   65535
                max_cq_period:  4095 us

        maximum available device memory:        131072Bytes

        num_comp_vectors:               4
                port:   1
                        state:                  PORT_ACTIVE (4)
                        max_mtu:                4096 (5)
                        active_mtu:             1024 (3)
                        sm_lid:                 0
                        port_lid:               0
                        port_lmc:               0x00
                        link_layer:             Ethernet
                        max_msg_sz:             0x40000000
                        port_cap_flags:         0x04010000
                        port_cap_flags2:        0x0000
                        max_vl_num:             invalid value (0)
                        bad_pkey_cntr:          0x0
                        qkey_viol_cntr:         0x0
                        sm_sl:                  0
                        pkey_tbl_len:           1
                        gid_tbl_len:            255
                        subnet_timeout:         0
                        init_type_reply:        0
                        active_width:           1X (1)
                        active_speed:           25.0 Gbps (32)
                        phys_state:             LINK_UP (5)
                        GID[  0]:               fe80:0000:0000:0000:0ac0:ebff:fef4:e6a7, RoCE v1
                        GID[  1]:               fe80::ac0:ebff:fef4:e6a7, RoCE v2
x.5.2.3 ibv_device的简单信息
[root@localhost ~]# ibv_devices  -h
    device                 node GUID
    ------              ----------------
    mlx5_0              08c0eb0300f4e6a6
    mlx5_1              08c0eb0300f4e6a7

x.5.3 infiniband-diags-compat的库 IB的调试工具库 不常用

提供的工具:
在这里插入图片描述

x.5.4 infiniband-diags IB的调试工具库,主要ibstat查看状态

可以看到
在这里插入图片描述

x.5.4.1 ibstatus 查看速率、端口、物理状态等
[root@localhost ~]# ibstatus 
Infiniband device 'mlx5_0' port 1 status:
        default gid:     fe80:0000:0000:0000:0ac0:ebff:fef4:e6a6
        base lid:        0x0
        sm lid:          0x0
        state:           4: ACTIVE
        phys state:      5: LinkUp
        rate:            25 Gb/sec (1X EDR)
        link_layer:      Ethernet

Infiniband device 'mlx5_1' port 1 status:
        default gid:     fe80:0000:0000:0000:0ac0:ebff:fef4:e6a7
        base lid:        0x0
        sm lid:          0x0
        state:           4: ACTIVE
        phys state:      5: LinkUp
        rate:            25 Gb/sec (1X EDR)
        link_layer:      Ethernet
x.5.4.2 ibstat 查看型号、固件、速率、状态等
[root@localhost ~]# ibstat -v
CA 'mlx5_0'
        CA type: MT4119
        Number of ports: 1
        Firmware version: 16.31.1014
        Hardware version: 0
        Node GUID: 0x08c0eb0300f4e6a6
        System image GUID: 0x08c0eb0300f4e6a6
        Port 1:
                State: Active
                Physical state: LinkUp
                Rate: 25
                Base lid: 0
                LMC: 0
                SM lid: 0
                Capability mask: 0x00010000
                Port GUID: 0x0ac0ebfffef4e6a6
                Link layer: Ethernet
CA 'mlx5_1'
        CA type: MT4119
        Number of ports: 1
        Firmware version: 16.31.1014
        Hardware version: 0
        Node GUID: 0x08c0eb0300f4e6a7
        System image GUID: 0x08c0eb0300f4e6a6
        Port 1:
                State: Active
                Physical state: LinkUp
                Rate: 25
                Base lid: 0
                LMC: 0
                SM lid: 0
                Capability mask: 0x00010000
                Port GUID: 0x0ac0ebfffef4e6a7
                Link layer: Ethernet

y 其他

y.1ioctl下发的cmd buffer结构如何?机制如何?

y.1.1 先看这个cmd buffer在何时怎么用的?

用户态:
比如要读取一个counter,最终会调用ioctl
在这里插入图片描述
设置header后就下发:
在这里插入图片描述

y.1.2 再看这个cmd被初始化的地方:

在这里插入图片描述
初始化cmdbuf中的objid和method,objid是ioctl,method是counter read
在这里插入图片描述

y.1.3 除了前面的counter的obj,还有哪些其他的obj:

ioctl
cmdbuf
obj1
obj2
method1
method2

举例:

ib_user_ioctl_cmds.h
ib_user_ioctl_cmds.h
ioctl
cmdbuf
UVERBS_OBJECT_PD
UVERBS_OBJECT_MR
UVERBS_OBJECT_QP
UVERBS_OBJECT_COUNTERS
UVERBS_METHOD_COUNTERS_CREATE
UVERBS_METHOD_COUNTERS_DESTROY

在这里插入图片描述

y.1.4 libibverbs下面的其他cmd的obj的代码存在形式

libibverbs下面的代码大多是以cmd_xxx.c的方式存在,这些文件实现机制类似前面的counter机制,定义接口,初始化cmdbuf,设置里面的objid和methodid。然后通过ioctl下发
在这里插入图片描述
比如qp的:一开始就定义cmdbuf
在这里插入图片描述

可以看到重要结构

ibv_command_buffer

struct ibv_command_buffer {
	struct ibv_command_buffer *next;
	struct ib_uverbs_attr *next_attr;
	struct ib_uverbs_attr *last_attr;
	/*
	 * Used by the legacy write interface to keep track of where the UHW
	 * buffer is located and the 'headroom' space that the common code
	 * uses to construct the command header and common command struct
	 * directly before the drivers' UHW.
	 */
	uint8_t uhw_in_idx;
	uint8_t uhw_out_idx;
	uint8_t uhw_in_headroom_dwords;
	uint8_t uhw_out_headroom_dwords;

	uint8_t buffer_error:1;
	/*
	 * These flags control what execute_ioctl_fallback does if the kernel
	 * does not support ioctl
	 */
	uint8_t fallback_require_ex:1;
	uint8_t fallback_ioctl_only:1;
	struct ib_uverbs_ioctl_hdr hdr;
};

struct ib_uverbs_ioctl_hdr {
	__u16 length;
	__u16 object_id;
	__u16 method_id;
	__u16 num_attrs;
	__aligned_u64 reserved1;
	__u32 driver_id;
	__u32 reserved2;
	struct ib_uverbs_attr  attrs[];
};


struct ib_uverbs_attr {
	__u16 attr_id;		/* command specific type attribute */
	__u16 len;		/* only for pointers and IDRs array */
	__u16 flags;		/* combination of UVERBS_ATTR_F_XXXX */
	union {
		struct {
			__u8 elem_id;
			__u8 reserved;
		} enum_data;
		__u16 reserved;
	} attr_data;
	union {
		/*
		 * ptr to command, inline data, idr/fd or
		 * ptr to __u32 array of IDRs
		 */
		__aligned_u64 data;
		/* Used by FD_IN and FD_OUT */
		__s64 data_s64;
	};
};
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值