Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions include/net/netns/smc.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ struct netns_smc {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *smc_hdr;
#endif
#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
struct smc_hs_ctrl __rcu *hs_ctrl;
#endif /* CONFIG_SMC_HS_CTRL_BPF */
unsigned int sysctl_autocorking_size;
unsigned int sysctl_smcr_buf_type;
int sysctl_smcr_testlink_time;
Expand Down
53 changes: 53 additions & 0 deletions include/net/smc.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include <linux/wait.h>
#include <linux/dibs.h>

struct tcp_sock;
struct inet_request_sock;
struct sock;

#define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */
Expand Down Expand Up @@ -50,4 +52,55 @@ struct smcd_dev {
u8 going_away : 1;
};

#define SMC_HS_CTRL_NAME_MAX 16

enum {
/* ops can be inherit from init_net */
SMC_HS_CTRL_FLAG_INHERITABLE = 0x1,

SMC_HS_CTRL_ALL_FLAGS = SMC_HS_CTRL_FLAG_INHERITABLE,
};

struct smc_hs_ctrl {
/* private */

struct list_head list;
struct module *owner;

/* public */

/* unique name */
char name[SMC_HS_CTRL_NAME_MAX];
int flags;

/* Invoked before computing SMC option for SYN packets.
* We can control whether to set SMC options by returning various value.
* Return 0 to disable SMC, or return any other value to enable it.
*/
int (*syn_option)(struct tcp_sock *tp);

/* Invoked before Set up SMC options for SYN-ACK packets
* We can control whether to respond SMC options by returning various
* value. Return 0 to disable SMC, or return any other value to enable
* it.
*/
int (*synack_option)(const struct tcp_sock *tp,
struct inet_request_sock *ireq);
};

#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
#define smc_call_hsbpf(init_val, tp, func, ...) ({ \
typeof(init_val) __ret = (init_val); \
struct smc_hs_ctrl *ctrl; \
rcu_read_lock(); \
ctrl = rcu_dereference(sock_net((struct sock *)(tp))->smc.hs_ctrl); \
if (ctrl && ctrl->func) \
__ret = ctrl->func(tp, ##__VA_ARGS__); \
rcu_read_unlock(); \
__ret; \
})
#else
#define smc_call_hsbpf(init_val, tp, ...) ({ (void)(tp); (init_val); })
#endif /* CONFIG_SMC_HS_CTRL_BPF */

#endif /* _SMC_H */
2 changes: 2 additions & 0 deletions kernel/bpf/bpf_struct_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -1162,6 +1162,7 @@ bool bpf_struct_ops_get(const void *kdata)
map = __bpf_map_inc_not_zero(&st_map->map, false);
return !IS_ERR(map);
}
EXPORT_SYMBOL_GPL(bpf_struct_ops_get);

void bpf_struct_ops_put(const void *kdata)
{
Expand All @@ -1173,6 +1174,7 @@ void bpf_struct_ops_put(const void *kdata)

bpf_map_put(&st_map->map);
}
EXPORT_SYMBOL_GPL(bpf_struct_ops_put);

u32 bpf_struct_ops_id(const void *kdata)
{
Expand Down
1 change: 1 addition & 0 deletions kernel/bpf/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,7 @@ int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)

return src - orig_src;
}
EXPORT_SYMBOL_GPL(bpf_obj_name_cpy);

int map_check_no_btf(const struct bpf_map *map,
const struct btf *btf,
Expand Down
31 changes: 17 additions & 14 deletions net/ipv4/tcp_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include <net/tcp.h>
#include <net/tcp_ecn.h>
#include <net/mptcp.h>
#include <net/smc.h>
#include <net/proto_memory.h>
#include <net/psp.h>

Expand Down Expand Up @@ -802,34 +803,36 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp,
mptcp_options_write(th, ptr, tp, opts);
}

static void smc_set_option(const struct tcp_sock *tp,
static void smc_set_option(struct tcp_sock *tp,
struct tcp_out_options *opts,
unsigned int *remaining)
{
#if IS_ENABLED(CONFIG_SMC)
if (static_branch_unlikely(&tcp_have_smc)) {
if (tp->syn_smc) {
if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
opts->options |= OPTION_SMC;
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
}
if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc) {
tp->syn_smc = !!smc_call_hsbpf(1, tp, syn_option);
/* re-check syn_smc */
if (tp->syn_smc &&
*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
opts->options |= OPTION_SMC;
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
}
}
#endif
}

static void smc_set_option_cond(const struct tcp_sock *tp,
const struct inet_request_sock *ireq,
struct inet_request_sock *ireq,
struct tcp_out_options *opts,
unsigned int *remaining)
{
#if IS_ENABLED(CONFIG_SMC)
if (static_branch_unlikely(&tcp_have_smc)) {
if (tp->syn_smc && ireq->smc_ok) {
if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
opts->options |= OPTION_SMC;
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
}
if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc && ireq->smc_ok) {
ireq->smc_ok = !!smc_call_hsbpf(1, tp, synack_option, ireq);
/* re-check smc_ok */
if (ireq->smc_ok &&
*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
opts->options |= OPTION_SMC;
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
}
}
#endif
Expand Down
10 changes: 10 additions & 0 deletions net/smc/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,13 @@ config SMC_DIAG
smcss.

if unsure, say Y.

config SMC_HS_CTRL_BPF
bool "Generic eBPF hook for SMC handshake flow"
depends on SMC && BPF_SYSCALL
default y
help
SMC_HS_CTRL_BPF enables support to register generic eBPF hook for SMC
handshake flow, which offer much greater flexibility in modifying the behavior
of the SMC protocol stack compared to a complete kernel-based approach. Select
this option if you want filtring the handshake process via eBPF programs.
1 change: 1 addition & 0 deletions net/smc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
smc-y += smc_tracepoint.o smc_inet.o
smc-$(CONFIG_SYSCTL) += smc_sysctl.o
smc-$(CONFIG_SMC_HS_CTRL_BPF) += smc_hs_bpf.o
9 changes: 9 additions & 0 deletions net/smc/af_smc.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#include "smc_tracepoint.h"
#include "smc_sysctl.h"
#include "smc_inet.h"
#include "smc_hs_bpf.h"

static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
* creation on server
Expand Down Expand Up @@ -3600,8 +3601,16 @@ static int __init smc_init(void)
pr_err("%s: smc_inet_init fails with %d\n", __func__, rc);
goto out_ulp;
}
rc = bpf_smc_hs_ctrl_init();
if (rc) {
pr_err("%s: bpf_smc_hs_ctrl_init fails with %d\n", __func__,
rc);
goto out_inet;
}
static_branch_enable(&tcp_have_smc);
return 0;
out_inet:
smc_inet_exit();
out_ulp:
tcp_unregister_ulp(&smc_ulp_ops);
out_ib:
Expand Down
140 changes: 140 additions & 0 deletions net/smc/smc_hs_bpf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
* Generic hook for SMC handshake flow.
*
* Copyright IBM Corp. 2016
* Copyright (c) 2025, Alibaba Inc.
*
* Author: D. Wythe <alibuda@linux.alibaba.com>
*/

#include <linux/bpf_verifier.h>
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/rculist.h>

#include "smc_hs_bpf.h"

static DEFINE_SPINLOCK(smc_hs_ctrl_list_lock);
static LIST_HEAD(smc_hs_ctrl_list);

static int smc_hs_ctrl_reg(struct smc_hs_ctrl *ctrl)
{
int ret = 0;

spin_lock(&smc_hs_ctrl_list_lock);
/* already exist or duplicate name */
if (smc_hs_ctrl_find_by_name(ctrl->name))
ret = -EEXIST;
else
list_add_tail_rcu(&ctrl->list, &smc_hs_ctrl_list);
spin_unlock(&smc_hs_ctrl_list_lock);
return ret;
}

static void smc_hs_ctrl_unreg(struct smc_hs_ctrl *ctrl)
{
spin_lock(&smc_hs_ctrl_list_lock);
list_del_rcu(&ctrl->list);
spin_unlock(&smc_hs_ctrl_list_lock);

/* Ensure that all readers to complete */
synchronize_rcu();
}

struct smc_hs_ctrl *smc_hs_ctrl_find_by_name(const char *name)
{
struct smc_hs_ctrl *ctrl;

list_for_each_entry_rcu(ctrl, &smc_hs_ctrl_list, list) {
if (strcmp(ctrl->name, name) == 0)
return ctrl;
}
return NULL;
}

static int __smc_bpf_stub_set_tcp_option(struct tcp_sock *tp) { return 1; }
static int __smc_bpf_stub_set_tcp_option_cond(const struct tcp_sock *tp,
struct inet_request_sock *ireq)
{
return 1;
}

static struct smc_hs_ctrl __smc_bpf_hs_ctrl = {
.syn_option = __smc_bpf_stub_set_tcp_option,
.synack_option = __smc_bpf_stub_set_tcp_option_cond,
};

static int smc_bpf_hs_ctrl_init(struct btf *btf) { return 0; }

static int smc_bpf_hs_ctrl_reg(void *kdata, struct bpf_link *link)
{
if (link)
return -EOPNOTSUPP;

return smc_hs_ctrl_reg(kdata);
}

static void smc_bpf_hs_ctrl_unreg(void *kdata, struct bpf_link *link)
{
smc_hs_ctrl_unreg(kdata);
}

static int smc_bpf_hs_ctrl_init_member(const struct btf_type *t,
const struct btf_member *member,
void *kdata, const void *udata)
{
const struct smc_hs_ctrl *u_ctrl;
struct smc_hs_ctrl *k_ctrl;
u32 moff;

u_ctrl = (const struct smc_hs_ctrl *)udata;
k_ctrl = (struct smc_hs_ctrl *)kdata;

moff = __btf_member_bit_offset(t, member) / 8;
switch (moff) {
case offsetof(struct smc_hs_ctrl, name):
if (bpf_obj_name_cpy(k_ctrl->name, u_ctrl->name,
sizeof(u_ctrl->name)) <= 0)
return -EINVAL;
return 1;
case offsetof(struct smc_hs_ctrl, flags):
if (u_ctrl->flags & ~SMC_HS_CTRL_ALL_FLAGS)
return -EINVAL;
k_ctrl->flags = u_ctrl->flags;
return 1;
default:
break;
}

return 0;
}

static const struct bpf_func_proto *
bpf_smc_hs_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
return bpf_base_func_proto(func_id, prog);
}

static const struct bpf_verifier_ops smc_bpf_verifier_ops = {
.get_func_proto = bpf_smc_hs_func_proto,
.is_valid_access = bpf_tracing_btf_ctx_access,
};

static struct bpf_struct_ops bpf_smc_hs_ctrl_ops = {
.name = "smc_hs_ctrl",
.init = smc_bpf_hs_ctrl_init,
.reg = smc_bpf_hs_ctrl_reg,
.unreg = smc_bpf_hs_ctrl_unreg,
.cfi_stubs = &__smc_bpf_hs_ctrl,
.verifier_ops = &smc_bpf_verifier_ops,
.init_member = smc_bpf_hs_ctrl_init_member,
.owner = THIS_MODULE,
};

int bpf_smc_hs_ctrl_init(void)
{
return register_bpf_struct_ops(&bpf_smc_hs_ctrl_ops, smc_hs_ctrl);
}
31 changes: 31 additions & 0 deletions net/smc/smc_hs_bpf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
* Generic hook for SMC handshake flow.
*
* Copyright IBM Corp. 2016
* Copyright (c) 2025, Alibaba Inc.
*
* Author: D. Wythe <alibuda@linux.alibaba.com>
*/

#ifndef __SMC_HS_CTRL
#define __SMC_HS_CTRL

#include <net/smc.h>

/* Find hs_ctrl by the target name, which required to be a c-string.
* Return NULL if no such ctrl was found,otherwise, return a valid ctrl.
*
* Note: Caller MUST ensure it's was invoked under rcu_read_lock.
*/
struct smc_hs_ctrl *smc_hs_ctrl_find_by_name(const char *name);

#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
int bpf_smc_hs_ctrl_init(void);
#else
static inline int bpf_smc_hs_ctrl_init(void) { return 0; }
#endif /* CONFIG_SMC_HS_CTRL_BPF */

#endif /* __SMC_HS_CTRL */
Loading
Loading