Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/buildomat/jobs/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,15 @@ pfexec add_drv xde
banner "test"
pfexec chmod +x /input/xde/work/test/loopback
pfexec /input/xde/work/test/loopback --nocapture

# Multicast tests must run with --test-threads=1 because they share
# hardcoded device names (xde_test_sim0/1, xde_test_vnic0/1) that conflict
# when tests run in parallel
pfexec chmod +x /input/xde/work/test/multicast_rx
pfexec /input/xde/work/test/multicast_rx --nocapture --test-threads=1

pfexec chmod +x /input/xde/work/test/multicast_multi_sub
pfexec /input/xde/work/test/multicast_multi_sub --nocapture --test-threads=1

pfexec chmod +x /input/xde/work/test/multicast_validation
pfexec /input/xde/work/test/multicast_validation --nocapture --test-threads=1
21 changes: 21 additions & 0 deletions .github/buildomat/jobs/xde.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
#: "=/work/release/xde_link.so",
#: "=/work/release/xde_link.so.sha256",
#: "=/work/test/loopback",
#: "=/work/test/multicast_rx",
#: "=/work/test/multicast_multi_sub",
#: "=/work/test/multicast_validation",
#: "=/work/xde.conf",
#: ]
#:
Expand Down Expand Up @@ -116,5 +119,23 @@ loopback_test=$(
cargo build -q --test loopback --message-format=json |\
jq -r "select(.profile.test == true) | .filenames[]"
)
cargo build --test multicast_rx
multicast_rx_test=$(
cargo build -q --test multicast_rx --message-format=json |\
jq -r "select(.profile.test == true) | .filenames[]"
)
cargo build --test multicast_multi_sub
multicast_multi_sub_test=$(
cargo build -q --test multicast_multi_sub --message-format=json |\
jq -r "select(.profile.test == true) | .filenames[]"
)
cargo build --test multicast_validation
multicast_validation_test=$(
cargo build -q --test multicast_validation --message-format=json |\
jq -r "select(.profile.test == true) | .filenames[]"
)
mkdir -p /work/test
cp $loopback_test /work/test/loopback
cp $multicast_rx_test /work/test/multicast_rx
cp $multicast_multi_sub_test /work/test/multicast_multi_sub
cp $multicast_validation_test /work/test/multicast_validation
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ More detail on our benchmarks can be found in xref:bench/README.adoc[bench/READM
* https://rfd.shared.oxide.computer/rfd/0009[RFD 9: Networking Considerations]
* https://rfd.shared.oxide.computer/rfd/0021[RFD 21: User Networking API]
* https://rfd.shared.oxide.computer/rfd/0063[RFD 63: Network Architecture]
* https://rfd.shared.oxide.computer/rfd/488[RFD 488: Multicast]
* https://www.microsoft.com/en-us/research/wp-content/uploads/2017/03/vfp-nsdi-2017-final.pdf[Microsoft's VFP]

== Directory Index
Expand Down
156 changes: 156 additions & 0 deletions bin/opteadm/src/bin/opteadm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use opte::api::Ipv4Addr;
use opte::api::Ipv6Addr;
use opte::api::MAJOR_VERSION;
use opte::api::MacAddr;
use opte::api::MulticastUnderlay;
use opte::api::Vni;
use opte::print::print_layer;
use opte::print::print_list_layers;
Expand All @@ -27,8 +28,10 @@ use oxide_vpc::api::AddFwRuleReq;
use oxide_vpc::api::AddRouterEntryReq;
use oxide_vpc::api::Address;
use oxide_vpc::api::BOUNDARY_SERVICES_VNI;
use oxide_vpc::api::ClearMcastForwardingReq;
use oxide_vpc::api::ClearVirt2BoundaryReq;
use oxide_vpc::api::ClearVirt2PhysReq;
use oxide_vpc::api::DEFAULT_MULTICAST_VNI;
use oxide_vpc::api::DelRouterEntryReq;
use oxide_vpc::api::DelRouterEntryResp;
use oxide_vpc::api::DhcpCfg;
Expand All @@ -39,22 +42,30 @@ use oxide_vpc::api::FirewallRule;
use oxide_vpc::api::IpCfg;
use oxide_vpc::api::Ipv4Cfg;
use oxide_vpc::api::Ipv6Cfg;
use oxide_vpc::api::McastSubscribeReq;
use oxide_vpc::api::McastUnsubscribeAllReq;
use oxide_vpc::api::McastUnsubscribeReq;
use oxide_vpc::api::NextHopV6;
use oxide_vpc::api::PhysNet;
use oxide_vpc::api::PortInfo;
use oxide_vpc::api::Ports;
use oxide_vpc::api::ProtoFilter;
use oxide_vpc::api::RemFwRuleReq;
use oxide_vpc::api::RemoveCidrResp;
use oxide_vpc::api::Replication;
use oxide_vpc::api::RouterClass;
use oxide_vpc::api::RouterTarget;
use oxide_vpc::api::SNat4Cfg;
use oxide_vpc::api::SNat6Cfg;
use oxide_vpc::api::SetExternalIpsReq;
use oxide_vpc::api::SetFwRulesReq;
use oxide_vpc::api::SetMcastForwardingReq;
use oxide_vpc::api::SetVirt2BoundaryReq;
use oxide_vpc::api::SetVirt2PhysReq;
use oxide_vpc::api::TunnelEndpoint;
use oxide_vpc::api::VpcCfg;
use oxide_vpc::print::print_mcast_fwd;
use oxide_vpc::print::print_mcast_subs;
use oxide_vpc::print::print_v2b;
use oxide_vpc::print::print_v2p;
use std::io;
Expand Down Expand Up @@ -225,6 +236,93 @@ enum Command {
/// Clear a virtual-to-boundary mapping
ClearV2B { prefix: IpCidr, tunnel_endpoint: Vec<Ipv6Addr> },

/// Set a multicast forwarding entry
///
/// Adds or updates a next hop for the specified underlay multicast address.
/// Multiple next hops can be configured for the same underlay address by
/// running this command multiple times (like `swadm route add`). If the
/// same next hop is specified again, its replication mode is updated.
///
/// OPTE routes to `next_hop` (unicast switch address) to determine which
/// underlay port to use, then sends the packet to underlay (multicast) with
/// multicast MAC. The switch matches the outer dst IP (multicast) and
/// Geneve replication tag.
SetMcastFwd {
/// The underlay multicast IPv6 address (admin-local scope ff04::/16).
/// This is the outer IPv6 destination in transmitted packets.
underlay: MulticastUnderlay,
/// The unicast IPv6 address of the switch for routing (e.g., fd00::1).
/// OPTE uses this to determine which underlay port to use via the
/// illumos routing table. Multiple next hops can be added by
/// running this command multiple times with the same underlay address.
next_hop: Ipv6Addr,
/// Tx-only replication instruction (tells the switch which port groups to use):
/// - External: front panel ports (decapped, egress to external networks)
/// - Underlay: sled-to-sled ports (underlay multicast replication)
/// - Both: both external and underlay (bifurcated)
///
/// Local same-sled delivery always happens via subscriptions regardless
/// of this setting.
replication: Replication,
},

/// Clear a multicast forwarding entry
ClearMcastFwd {
/// The underlay multicast IPv6 address (admin-local scope ff04::/16)
underlay: MulticastUnderlay,
},

/// Dump the multicast forwarding table
DumpMcastFwd,

/// Dump multicast subscriptions (group -> ports on this sled)
DumpMcastSubs,

/// Subscribe a port to a multicast group
///
/// Allows a port to receive multicast traffic for the specified group.
/// The group address is an overlay multicast address which is translated
/// to an underlay IPv6 multicast address via the M2P (Multicast-to-Physical)
/// mapping table.
///
/// Subscriptions are local to this sled and control Rx (receive). For Tx
/// (transmit), configure multicast forwarding via set-mcast-fwd.
McastSubscribe {
/// The OPTE port name (e.g., opte0)
#[arg(short)]
port: String,
/// The overlay multicast group address (IPv4 or IPv6)
group: IpAddr,
},

/// Unsubscribe a port from a multicast group
///
/// Removes a port's subscription to a multicast group, preventing it from
/// receiving traffic for that group. This is the inverse of mcast-subscribe.
///
/// If the M2P mapping for the group has already been removed, this operation
/// succeeds as a no-op.
McastUnsubscribe {
/// The OPTE port name (e.g., opte0)
#[arg(short)]
port: String,
/// The overlay multicast group address (IPv4 or IPv6)
group: IpAddr,
},

/// Unsubscribe all ports from a multicast group
///
/// Removes all port subscriptions for a given multicast group on this sled
/// in a single operation. This comes in handy for decommissioning a
/// multicast group entirely on this sled.
///
/// If the M2P mapping for the group has already been removed, this
/// operation succeeds as a no-op.
McastUnsubscribeAll {
/// The overlay multicast group address (IPv4 or IPv6)
group: IpAddr,
},

/// Add a new router entry, either IPv4 or IPv6.
AddRouterEntry {
#[command(flatten)]
Expand Down Expand Up @@ -764,6 +862,64 @@ fn main() -> anyhow::Result<()> {
hdl.clear_v2b(&req)?;
}

Command::SetMcastFwd { underlay, next_hop, replication } => {
// OPTE routes to the next hop's unicast address to determine which
// underlay port to use via the illumos routing table and DDM.
//
// The packet is then sent to the multicast address with a multicast
// MAC.
//
// The switch matches on the outer dst IP (multicast) and Geneve
// `Replication` tag to determine which port groups to replicate to:
// - External: front panel ports (which get decapped on egress)
// - Underlay: underlay ports (sleds)
// - Both: both (bifurcated)
//
// The Replication type is Tx-only, Rx ignores it and delivers
// locally based on subscriptions.
//
// Like `swadm route add`, this command can be run multiple times
// with the same underlay address to add multiple next hops. If the
// same next hop is specified again, its replication mode is updated.

// Always use fleet-wide DEFAULT_MULTICAST_VNI
let next_hop_vni = Vni::new(DEFAULT_MULTICAST_VNI).unwrap();
let next_hop_addr = NextHopV6::new(next_hop, next_hop_vni);
let req = SetMcastForwardingReq {
underlay,
next_hops: vec![(next_hop_addr, replication)],
};
hdl.set_mcast_fwd(&req)?;
}

Command::ClearMcastFwd { underlay } => {
let req = ClearMcastForwardingReq { underlay };
hdl.clear_mcast_fwd(&req)?;
}

Command::DumpMcastFwd => {
print_mcast_fwd(&hdl.dump_mcast_fwd()?)?;
}

Command::DumpMcastSubs => {
print_mcast_subs(&hdl.dump_mcast_subs()?)?;
}

Command::McastSubscribe { port, group } => {
let req = McastSubscribeReq { port_name: port, group };
hdl.mcast_subscribe(&req)?;
}

Command::McastUnsubscribe { port, group } => {
let req = McastUnsubscribeReq { port_name: port, group };
hdl.mcast_unsubscribe(&req)?;
}

Command::McastUnsubscribeAll { group } => {
let req = McastUnsubscribeAllReq { group };
hdl.mcast_unsubscribe_all(&req)?;
}

Command::AddRouterEntry {
route: RouterRule { port, dest, target, class },
} => {
Expand Down
2 changes: 2 additions & 0 deletions crates/illumos-sys-hdrs/src/kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,8 @@ unsafe extern "C" {
pub fn freemsg(mp: *mut mblk_t);
pub fn freemsgchain(mp: *mut mblk_t);

pub fn msgpullup(mp: *mut mblk_t, n_bytes: isize) -> *mut mblk_t;

pub fn gethrtime() -> hrtime_t;

pub fn getmajor(dev: dev_t) -> major_t;
Expand Down
70 changes: 45 additions & 25 deletions crates/opte-api/src/cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,31 +25,40 @@ pub const XDE_IOC_OPTE_CMD: i32 = XDE_IOC as i32 | 0x01;
#[derive(Clone, Copy, Debug)]
#[repr(C)]
pub enum OpteCmd {
ListPorts = 1, // list all ports
AddFwRule = 20, // add firewall rule
RemFwRule = 21, // remove firewall rule
SetFwRules = 22, // set/replace all firewall rules at once
DumpTcpFlows = 30, // dump TCP flows
DumpLayer = 31, // dump the specified Layer
DumpUft = 32, // dump the Unified Flow Table
ListLayers = 33, // list the layers on a given port
ClearUft = 40, // clear the UFT
ClearLft = 41, // clear the given Layer's Flow Table
SetVirt2Phys = 50, // set a v2p mapping
DumpVirt2Phys = 51, // dump the v2p mappings
SetVirt2Boundary = 52, // set a v2b mapping
ClearVirt2Boundary = 53, // clear a v2b mapping
DumpVirt2Boundary = 54, // dump the v2b mappings
ClearVirt2Phys = 55, // clear a v2p mapping
AddRouterEntry = 60, // add a router entry for IP dest
DelRouterEntry = 61, // remove a router entry for IP dest
CreateXde = 70, // create a new xde device
DeleteXde = 71, // delete an xde device
SetXdeUnderlay = 72, // set xde underlay devices
ClearXdeUnderlay = 73, // clear xde underlay devices
SetExternalIps = 80, // set xde external IPs for a port
AllowCidr = 90, // allow ip block through gateway tx/rx
RemoveCidr = 91, // deny ip block through gateway tx/rx
ListPorts = 1, // list all ports
AddFwRule = 20, // add firewall rule
RemFwRule = 21, // remove firewall rule
SetFwRules = 22, // set/replace all firewall rules at once
DumpTcpFlows = 30, // dump TCP flows
DumpLayer = 31, // dump the specified Layer
DumpUft = 32, // dump the Unified Flow Table
ListLayers = 33, // list the layers on a given port
ClearUft = 40, // clear the UFT
ClearLft = 41, // clear the given Layer's Flow Table
SetVirt2Phys = 50, // set a v2p mapping
DumpVirt2Phys = 51, // dump the v2p mappings
SetVirt2Boundary = 52, // set a v2b mapping
ClearVirt2Boundary = 53, // clear a v2b mapping
DumpVirt2Boundary = 54, // dump the v2b mappings
ClearVirt2Phys = 55, // clear a v2p mapping
AddRouterEntry = 60, // add a router entry for IP dest
DelRouterEntry = 61, // remove a router entry for IP dest
CreateXde = 70, // create a new xde device
DeleteXde = 71, // delete an xde device
SetXdeUnderlay = 72, // set xde underlay devices
ClearXdeUnderlay = 73, // clear xde underlay devices
SetExternalIps = 80, // set xde external IPs for a port
AllowCidr = 90, // allow ip block through gateway tx/rx
RemoveCidr = 91, // deny ip block through gateway tx/rx
SetMcastForwarding = 100, // set multicast forwarding entries
ClearMcastForwarding = 101, // clear multicast forwarding entries
DumpMcastForwarding = 102, // dump multicast forwarding table
McastSubscribe = 103, // subscribe a port to a multicast group
McastUnsubscribe = 104, // unsubscribe a port from a multicast group
SetMcast2Phys = 105, // set M2P mapping (group -> underlay mcast)
ClearMcast2Phys = 106, // clear M2P mapping
DumpMcastSubscriptions = 107, // dump multicast subscription table
McastUnsubscribeAll = 108, // unsubscribe all ports from a multicast group
}

impl TryFrom<c_int> for OpteCmd {
Expand Down Expand Up @@ -82,6 +91,15 @@ impl TryFrom<c_int> for OpteCmd {
80 => Ok(Self::SetExternalIps),
90 => Ok(Self::AllowCidr),
91 => Ok(Self::RemoveCidr),
100 => Ok(Self::SetMcastForwarding),
101 => Ok(Self::ClearMcastForwarding),
102 => Ok(Self::DumpMcastForwarding),
103 => Ok(Self::McastSubscribe),
104 => Ok(Self::McastUnsubscribe),
105 => Ok(Self::SetMcast2Phys),
106 => Ok(Self::ClearMcast2Phys),
107 => Ok(Self::DumpMcastSubscriptions),
108 => Ok(Self::McastUnsubscribeAll),
_ => Err(()),
}
}
Expand Down Expand Up @@ -177,6 +195,7 @@ pub enum OpteError {
dest: IpCidr,
target: String,
},
InvalidUnderlayMulticast(String),
LayerNotFound(String),
MacExists {
port: String,
Expand Down Expand Up @@ -230,6 +249,7 @@ impl OpteError {
Self::DeserCmdReq(_) => ENOMSG,
Self::FlowExists(_) => EEXIST,
Self::InvalidRouterEntry { .. } => EINVAL,
Self::InvalidUnderlayMulticast(_) => EINVAL,
Self::LayerNotFound(_) => ENOENT,
Self::MacExists { .. } => EEXIST,
Self::MaxCapacity(_) => ENFILE,
Expand Down
Loading