diff --git a/Documentation/networking/bridge-subport.txt b/Documentation/networking/bridge-subport.txt new file mode 100644 index 0000000000000..02b0c0deeced9 --- /dev/null +++ b/Documentation/networking/bridge-subport.txt @@ -0,0 +1,154 @@ +Bridge subport & superport support +================================== + + +Subports and superports are 2 related but distinct port functionalities of the +Linux bridge code. They both affect how packets are replicated on a bridge, +implementing split-horizon functionality. + + +Superports +---------- + +A superport is a number of bridge ports that share the same integer +"superport" value different from zero. The value is freely configurable by +the user, and 0 disables the function (by comparing unequal to itself, thus +meaning "no superport"). + +When 2 or more bridge ports are configured with the same value, this prevents +forwarding of packets that arrive on any of these ports out onto any other of +these ports. This essentially groups them into a common broadcast domain. It +has no effect on MAC learning, STP, or anything else. + +This functionality is mostly useful to build a mesh on top of tunnels, e.g. +a triangle like this: + + bridge0 tunl01 ----- tunl10 bridge1 + tunl02 tunl12 + \\\ /// + tunl20 tunl21 + bridge2 + + +While this can be made to work with STP by blocking one of the tunnels, this +is not desirable because that would make traffic take an extra hop. By +putting the ports in a superport group on each of the bridges, traffic always +flows directly to the learned destination, without creating loops by being +forwarded back onto the mesh. + +The superport code does this on top of distinct devices, which could even be +of distinct types (physical ports, tap devices, GRETAP, VXLAN, etc.) + + +Subports +-------- + +Subports provide very similar functionality, but are built into a particular +network device driver (or tunnel implementation). They are intever values +again, but this time they provide more specific data for the driver when +sending packets. + +The bridge layer makes no assumptions about the meaning of the values (other +than removing duplicates). They cannot be configured by the user, instead +they are provided to the bridge layer on each received packet as appropriate. +The bridge keeps this information along its MAC learning data and provides it +back to the driver when sending/flooding packets. + +This is considerably more complicated than superports, but provides a crucial +functionality that superports cannot: the ability to control multicast +transmission. + +The idea here is that the bridge's TX path for multicast packets can pass a +list of subport identifiers down to the device, representing which ethernet +stations are intended to receive the particular packet. This is particularly +useful in two scenarios: + +1.) 802.11 multicast optimization + + When the 802.11 TX layer knows which stations a multicast packet is + actually intended for, it can do the following things: + + - unicast it unconditionally if there is only a single receiver + - clone and unicast it, with enough information supplied to an algorithm + that can calculate whether it is advantageous to do so + - multicast and pick a higher TX rate depending on the information it has + on the intended receivers + + All of these are available in "enterprise" 802.11 solutions, yet have + eluded Linux wifi APs for a while; only recently has unicast-conversion + made it in (though in a much less sophisticated way). + + +2.) Ethernet over Multicast-capable media tunneling + + Any encapsulation of Ethernet with more than 2 endpoints that is running on + top of a underlay network that supports multicast may be able to benefit + from having the extra information. + + Most prominently, the under-development IETF BIER approach (it's a shim + header with bits controlling replication; packets are duplicated en route + so that each outgoing duplicate carries a non-overlapping subset of the + bits) is a direct fit for this -- the subport information can directly map + to bits in the BIER shim. + + Other options include user-configured IP multicast group mappings and MPLS + multicast (which is not widely used, but well). + + +The common factor between these scenarios is that it is the driver that +controls replication of multicast packets. This is impossible to do with +superports, since that implies multiple distinct netdevices and the bridge +layer replicating packets. It's too late then, when 10 netdevices get 10 +packets to transmit. + + +BRANCH IMPLEMENTATION STATUS (TO BE REMOVED BEFORE PUSHING OUT) +--------------------------------------------------------------- +(including VPLS bits) + + +So, I wrote this over christmas on a hacker event where I had some time to +play with a VPLS network ;). Here's the status on things. + +General: + +- I've just rebased these commits; there were a few small changes to the + network code in the meantime (I don't think anything broke) + + I DID NOT TEST THE REBASED VERSION. IT COMPILES BUT MAY SET YOUR COMPUTER + ON FIRE. + +MPLS layer: + +- the "MPT_HANDLER" thing is probably overkill, it likely makes more sense to + tie in the VPLS code more directly. + +VPLS: + +- I haven't implemented the control word. + +- I made a design mistake with the wire ID. It's simply not needed. A + pseudowire can be identified by its incoming label. There is also some + really ugly code keeping an array of wires... + +- The genetlink interface is not needed either. Really, the MPLS routing + table can carry all of the information. You can create a "vpls0" device + without any pseudowires, and then add routes with the neccessary info: + + "ip -f mpls route add 100 vpls vpls0 as 200 via inet 1.2.3.4 dev eth0" + + This would need the addition of a new "vpls-device" netlink attribute, and + would "split" the information in the route between RX and TX - the RX path + uses the incoming label (100) and vpls-device (vpls0) information, giving + the packet to the proper vpls device. The TX path can then reuse all the + destination information on the route -- which would make things quite a bit + simpler because we can use existing handling. + +- I only hacked in IPv4 dst info, this would be fixed by the above. + +bridge: + +- the subport code is not finished, it doesn't actually do the multicast + pieces. It should hold an array of subport indexes + an array of lifetimes + (subport IDs need to age out like ports), and the array can be passed down + on TX with RCU semantics. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d460a4cbda1c8..8ca275d860edf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -732,6 +732,12 @@ struct sk_buff { __u32 secmark; #endif + unsigned subport_cnt; + union { + __u32 subport; + __u32 *subport_lst; + }; + union { __u32 mark; __u32 reserved_tailroom; diff --git a/include/net/mpls.h b/include/net/mpls.h index 1dbc669b770e8..57f055629f121 100644 --- a/include/net/mpls.h +++ b/include/net/mpls.h @@ -33,4 +33,15 @@ static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) { return (struct mpls_shim_hdr *)skb_network_header(skb); } + +struct mpls_shim_hdr; +typedef int (*mpls_handler)(void *arg, struct sk_buff *skb, + struct net_device *dev, struct packet_type *pt, + struct mpls_shim_hdr *hdr, + struct net_device *orig_dev); + +extern int mpls_handler_add(struct net *net, unsigned index, + mpls_handler handler, void *handler_arg); +extern int mpls_handler_del(struct net *net, unsigned index); + #endif diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8ed679fe603fc..bf9b260b69ae9 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -325,6 +325,7 @@ enum { IFLA_BRPORT_MCAST_TO_UCAST, IFLA_BRPORT_VLAN_TUNNEL, IFLA_BRPORT_BCAST_FLOOD, + IFLA_BRPORT_SUPERPORT, /* superport (split-horizon) ID */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index f3d16dbe09d64..8f174c6e0f2d0 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -27,6 +27,7 @@ enum { NDA_MASTER, NDA_LINK_NETNSID, NDA_SRC_VNI, + NDA_SUBPORT, __NDA_MAX }; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 430b53e7d941d..5fef3213972d7 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -80,6 +80,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) else br_flood(br, skb, BR_PKT_MULTICAST, false, true); } else if ((dst = br_fdb_find_rcu(br, dest, vid)) != NULL) { + skb->subport = dst->subport; + skb->subport_cnt = dst->subport ? 1 : 0; br_forward(dst->dst, skb, false, true); } else { br_flood(br, skb, BR_PKT_UNICAST, false, true); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index ab0c7cc8448f4..034183875d5fd 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -29,7 +29,7 @@ static struct kmem_cache *br_fdb_cache __read_mostly; static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr, u16 vid); + __u32 subport, const unsigned char *addr, u16 vid); static void fdb_notify(struct net_bridge *br, const struct net_bridge_fdb_entry *, int); @@ -278,7 +278,7 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) insert: /* insert new address, may fail if invalid address or dup. */ - fdb_insert(br, p, newaddr, 0); + fdb_insert(br, p, 0, newaddr, 0); if (!vg || !vg->num_vlans) goto done; @@ -288,7 +288,7 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) * from under us. */ list_for_each_entry(v, &vg->vlan_list, vlist) - fdb_insert(br, p, newaddr, v->vid); + fdb_insert(br, p, 0, newaddr, v->vid); done: spin_unlock_bh(&br->hash_lock); @@ -307,10 +307,11 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) if (f && f->is_local && !f->dst && !f->added_by_user) fdb_delete_local(br, NULL, f); - fdb_insert(br, NULL, newaddr, 0); + fdb_insert(br, NULL, 0, newaddr, 0); vg = br_vlan_group(br); if (!vg || !vg->num_vlans) goto out; + /* Now remove and add entries for every VLAN configured on the * bridge. This function runs under RTNL so the bitmap will not * change from under us. @@ -321,7 +322,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) f = br_fdb_find(br, br->dev->dev_addr, v->vid); if (f && f->is_local && !f->dst && !f->added_by_user) fdb_delete_local(br, NULL, f); - fdb_insert(br, NULL, newaddr, v->vid); + fdb_insert(br, NULL, 0, newaddr, v->vid); } out: spin_unlock_bh(&br->hash_lock); @@ -479,6 +480,8 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, fe->port_no = f->dst->port_no; fe->port_hi = f->dst->port_no >> 8; + fe->unused = f->subport; + fe->is_local = f->is_local; if (!f->is_static) fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated); @@ -495,6 +498,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, struct net_bridge_port *source, + __u32 subport, const unsigned char *addr, __u16 vid, unsigned char is_local, @@ -506,6 +510,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, if (fdb) { memcpy(fdb->addr.addr, addr, ETH_ALEN); fdb->dst = source; + fdb->subport = subport; fdb->vlan_id = vid; fdb->is_local = is_local; fdb->is_static = is_static; @@ -518,7 +523,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, } static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr, u16 vid) + __u32 subport, const unsigned char *addr, u16 vid) { struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; @@ -538,7 +543,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, fdb_delete(br, fdb); } - fdb = fdb_create(head, source, addr, vid, 1, 1); + fdb = fdb_create(head, source, subport, addr, vid, 1, 1); if (!fdb) return -ENOMEM; @@ -554,13 +559,14 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, int ret; spin_lock_bh(&br->hash_lock); - ret = fdb_insert(br, source, addr, vid); + ret = fdb_insert(br, source, 0, addr, vid); spin_unlock_bh(&br->hash_lock); return ret; } void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr, u16 vid, bool added_by_user) + __u32 subport, const unsigned char *addr, u16 vid, + bool added_by_user) { struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; @@ -586,8 +592,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, unsigned long now = jiffies; /* fastpath: update of existing entry */ - if (unlikely(source != fdb->dst)) { + if (unlikely(source != fdb->dst || + subport != fdb->subport)) { fdb->dst = source; + fdb->subport = subport; fdb_modified = true; /* Take over HW learned entry */ if (unlikely(fdb->added_by_external_learn)) @@ -603,7 +611,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, } else { spin_lock(&br->hash_lock); if (likely(!fdb_find_rcu(head, addr, vid))) { - fdb = fdb_create(head, source, addr, vid, 0, 0); + fdb = fdb_create(head, source, subport, addr, vid, 0, 0); if (fdb) { if (unlikely(added_by_user)) fdb->added_by_user = 1; @@ -665,6 +673,8 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, if (fdb->vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) goto nla_put_failure; + if (fdb->subport && nla_put(skb, NDA_SUBPORT, sizeof(u32), &fdb->subport)) + goto nla_put_failure; nlmsg_end(skb, nlh); return 0; @@ -791,7 +801,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, if (!(flags & NLM_F_CREATE)) return -ENOENT; - fdb = fdb_create(head, source, addr, vid, 0, 0); + fdb = fdb_create(head, source, 0, addr, vid, 0, 0); if (!fdb) return -ENOMEM; @@ -854,7 +864,7 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, } local_bh_disable(); rcu_read_lock(); - br_fdb_update(br, p, addr, vid, true); + br_fdb_update(br, p, 0, addr, vid, true); rcu_read_unlock(); local_bh_enable(); } else if (ndm->ndm_flags & NTF_EXT_LEARNED) { @@ -1081,7 +1091,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, head = &br->hash[br_mac_hash(addr, vid)]; fdb = br_fdb_find(br, addr, vid); if (!fdb) { - fdb = fdb_create(head, p, addr, vid, 0, 0); + fdb = fdb_create(head, p, 0, addr, vid, 0, 0); if (!fdb) { err = -ENOMEM; goto err_unlock; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 48fb17417fac3..12620f5117fb2 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -25,12 +25,14 @@ static inline int should_deliver(const struct net_bridge_port *p, const struct sk_buff *skb) { + struct net_bridge_port *from = br_port_get_rcu(skb->dev); struct net_bridge_vlan_group *vg; vg = nbp_vlan_group_rcu(p); return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING && - nbp_switchdev_allowed_egress(p, skb); + nbp_switchdev_allowed_egress(p, skb) && + (!p->superport || !from || p->superport != from->superport); } int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 013f2290bfa56..34efe2735592f 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -127,6 +127,11 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, } } +static __u32 skb_subport(struct sk_buff *skb) +{ + return skb->subport_cnt == 1 ? skb->subport : 0; +} + /* note: already called with rcu_read_lock */ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -150,7 +155,8 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb /* insert into forwarding database after filtering to avoid spoofing */ br = p->br; if (p->flags & BR_LEARNING) - br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false); + br_fdb_update(br, p, skb_subport(skb), eth_hdr(skb)->h_source, + vid, false); local_rcv = !!(br->dev->flags & IFF_PROMISC); if (is_multicast_ether_addr(dest)) { @@ -229,7 +235,8 @@ static void __br_handle_local_finish(struct sk_buff *skb) /* check if vlan is allowed, to avoid spoofing */ if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid)) - br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false); + br_fdb_update(p->br, p, skb_subport(skb), + eth_hdr(skb)->h_source, vid, false); } /* note: already called with rcu_read_lock */ diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 1e63ec466d7c7..13682726c947a 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -152,6 +152,7 @@ static inline size_t br_port_info_size(void) #ifdef CONFIG_BRIDGE_IGMP_SNOOPING + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */ #endif + + nla_total_size(4) /* IFLA_BRPORT_SUPERPORT */ + 0; } @@ -208,7 +209,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, p->topology_change_ack) || nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) || nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags & - BR_VLAN_TUNNEL))) + BR_VLAN_TUNNEL)) || + nla_put_u32(skb, IFLA_BRPORT_SUPERPORT, p->superport)) return -EMSGSIZE; timerval = br_timer_value(&p->message_age_timer); @@ -637,6 +639,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 }, [IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 }, [IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 }, + [IFLA_BRPORT_SUPERPORT] = { .type = NLA_U32 }, }; /* Change the state of the port and notify spanning tree */ @@ -728,6 +731,10 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) return err; } #endif + + if (tb[IFLA_BRPORT_SUPERPORT]) + p->superport = nla_get_u32(tb[IFLA_BRPORT_SUPERPORT]); + br_port_flags_change(p, old_flags ^ p->flags); return 0; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 20626927f4336..987e60e24fb5c 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -163,6 +163,7 @@ struct net_bridge_vlan_group { struct net_bridge_fdb_entry { struct hlist_node hlist; struct net_bridge_port *dst; + __u32 subport; mac_addr addr; __u16 vlan_id; @@ -244,6 +245,8 @@ struct net_bridge_port { struct kobject kobj; struct rcu_head rcu; + u32 superport; + #ifdef CONFIG_BRIDGE_IGMP_SNOOPING struct bridge_mcast_own_query ip4_own_query; #if IS_ENABLED(CONFIG_IPV6) @@ -516,7 +519,8 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count, int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid); void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr, u16 vid, bool added_by_user); + __u32 subport, const unsigned char *addr, u16 vid, + bool added_by_user); int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 5d5d413a6cf8a..293b95d167367 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -175,6 +175,18 @@ BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI); BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD); BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD); +static ssize_t show_superport(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%u\n", p->superport); +} +static int br_set_superport(struct net_bridge_port *p, unsigned long val) +{ + p->superport = val; + return 0; +} +static BRPORT_ATTR(superport, S_IRUGO | S_IWUSR, + show_superport, br_set_superport); + #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) { @@ -214,6 +226,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_root_block, &brport_attr_learning, &brport_attr_unicast_flood, + &brport_attr_superport, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, &brport_attr_multicast_fast_leave, diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig index 5c467ef973114..42381a3acae81 100644 --- a/net/mpls/Kconfig +++ b/net/mpls/Kconfig @@ -33,4 +33,9 @@ config MPLS_IPTUNNEL ---help--- mpls ip tunnel support. +config MPLS_VPLS + tristate "VPLS support" + ---help--- + Add support for de-&encapsulating VPLS. + endif # MPLS diff --git a/net/mpls/Makefile b/net/mpls/Makefile index 9ca9236250165..d9e6326664171 100644 --- a/net/mpls/Makefile +++ b/net/mpls/Makefile @@ -4,5 +4,6 @@ obj-$(CONFIG_NET_MPLS_GSO) += mpls_gso.o obj-$(CONFIG_MPLS_ROUTING) += mpls_router.o obj-$(CONFIG_MPLS_IPTUNNEL) += mpls_iptunnel.o +obj-$(CONFIG_MPLS_VPLS) += vpls.o mpls_router-y := af_mpls.o diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 94b3317232a65..46ad81968b01a 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -299,6 +300,7 @@ static bool mpls_egress(struct net *net, struct mpls_route *rt, success = true; break; } + case MPT_HANDLER: case MPT_UNSPEC: /* Should have decided which protocol it is by now */ break; @@ -356,6 +358,10 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, goto drop; } + if (rt->rt_payload_type == MPT_HANDLER) + return rt->rt_handler(rt->rt_harg, skb, dev, pt, hdr, + orig_dev); + nh = mpls_select_multipath(rt, skb); if (!nh) goto err; @@ -947,6 +953,16 @@ static int mpls_route_add(struct mpls_route_config *cfg, if (!mpls_label_ok(net, index, extack)) goto errout; + switch (cfg->rc_payload_type) { + case MPT_UNSPEC: + case MPT_IPV4: + case MPT_IPV6: + break; + default: + err = -EINVAL; + goto errout; + } + /* Append makes no sense with mpls */ err = -EOPNOTSUPP; if (cfg->rc_nlflags & NLM_F_APPEND) { @@ -1271,6 +1287,74 @@ static int mpls_netconf_dump_devconf(struct sk_buff *skb, return skb->len; } +int mpls_handler_add(struct net *net, unsigned index, mpls_handler handler, + void *handler_arg) +{ + struct mpls_route __rcu **platform_label; + struct mpls_route *rt, *old; + int err = -EINVAL; + + /* Reserved labels may not be set */ + if (index < MPLS_LABEL_FIRST_UNRESERVED) + goto errout; + + /* The full 20 bit range may not be supported. */ + if (index >= net->mpls.platform_labels) + goto errout; + + err = -EEXIST; + platform_label = rtnl_dereference(net->mpls.platform_label); + old = rtnl_dereference(platform_label[index]); + if (old) + goto errout; + + err = -ENOMEM; + rt = mpls_rt_alloc(0, 0, 0); + if (!rt) + goto errout; + + rt->rt_protocol = RTPROT_KERNEL; + rt->rt_payload_type = MPT_HANDLER; + rt->rt_handler = handler; + rt->rt_harg = handler_arg; + + mpls_route_update(net, index, rt, NULL); + return 0; + +errout: + return err; +} +EXPORT_SYMBOL(mpls_handler_add); + +int mpls_handler_del(struct net *net, unsigned index) +{ + struct mpls_route __rcu **platform_label; + struct mpls_route *old; + int err = -EINVAL; + + /* Reserved labels may not be removed */ + if (index < MPLS_LABEL_FIRST_UNRESERVED) + goto errout; + + /* The full 20 bit range may not be supported */ + if (index >= net->mpls.platform_labels) + goto errout; + + platform_label = rtnl_dereference(net->mpls.platform_label); + old = rtnl_dereference(platform_label[index]); + if (!old) + goto errout; + if (old->rt_payload_type != MPT_HANDLER) + goto errout; + + mpls_route_update(net, index, NULL, NULL); + + err = 0; +errout: + return err; +} +EXPORT_SYMBOL(mpls_handler_del); + #define MPLS_PERDEV_SYSCTL_OFFSET(field) \ (&((struct mpls_dev *)0)->field) diff --git a/net/mpls/internal.h b/net/mpls/internal.h index cf65aec2e551b..2cd73eb514637 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -78,6 +78,7 @@ enum mpls_payload_type { MPT_UNSPEC, /* IPv4 or IPv6 */ MPT_IPV4 = 4, MPT_IPV6 = 6, + MPT_HANDLER = 255, /* Other types not implemented: * - Pseudo-wire with or without control word (RFC4385) @@ -141,6 +142,8 @@ enum mpls_ttl_propagation { */ struct mpls_route { /* next hop label forwarding entry */ struct rcu_head rt_rcu; + mpls_handler rt_handler; + void *rt_harg; u8 rt_protocol; u8 rt_payload_type; u8 rt_max_alen; diff --git a/net/mpls/vpls.c b/net/mpls/vpls.c new file mode 100644 index 0000000000000..bf9d4bc687a80 --- /dev/null +++ b/net/mpls/vpls.c @@ -0,0 +1,793 @@ +/* + * net/mpls/vpls.c + * + * Copyright (C) 2016 David Lamparter + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include "internal.h" +#include "vpls.h" + +#define DRV_NAME "vpls" + +#define MIN_MTU 68 /* Min L3 MTU */ +#define MAX_MTU 65535 /* Max L3 MTU (arbitrary) */ + +#define MAXWIRES 256 + +union vpls_nh { + struct sockaddr_in sin; + struct sockaddr_in6 sin6; +}; + +struct vpls_dst { + struct net_device *dev; + unsigned label_in, label_out; + union vpls_nh addr; + u16 vlan_id; + u8 via_table; + u8 flags; + u8 ttl; +}; + +struct vpls_dst_list { + size_t count; + struct vpls_dst *items; +}; + +struct vpls_priv { + struct net *encap_net; + struct vpls_dst_list *dsts; +}; + +static int vpls_xmit_dst(struct sk_buff *skb, struct vpls_priv *vpls, + struct vpls_dst *dst) +{ + unsigned int hh_len; + unsigned int new_header_size; + struct mpls_shim_hdr *hdr; + struct net_device *out_dev = dst->dev; + int err; + + if (!mpls_output_possible(dst->dev) || skb_warn_if_lro(skb)) + return -1; + + new_header_size = 1 * sizeof(struct mpls_shim_hdr); + + hh_len = LL_RESERVED_SPACE(out_dev); + if (!out_dev->header_ops) + hh_len = 0; + + if (skb_cow(skb, hh_len + new_header_size)) + return -1; + + skb_push(skb, new_header_size); + skb_reset_network_header(skb); + + skb->dev = out_dev; + skb->protocol = htons(ETH_P_MPLS_UC); + + hdr = mpls_hdr(skb); + hdr[0] = mpls_entry_encode(dst->label_out, dst->ttl, 0, true); + + if (dst->flags & VPLS_F_VLAN) + skb_vlan_push(skb, htons(ETH_P_8021Q), dst->vlan_id); + + err = neigh_xmit(dst->via_table, out_dev, &dst->addr, skb); + if (err) + net_dbg_ratelimited("%s: packet transmission failed: %d\n", + __func__, err); + + return err; +} + +static netdev_tx_t vpls_xmit(struct sk_buff *skb, struct net_device *dev) +{ + int ret = -EINVAL; + struct vpls_priv *priv = netdev_priv(dev); + struct vpls_dst_list *dsts; + struct sk_buff *cloned; + size_t i; + + skb_orphan(skb); + skb_forward_csum(skb); + + rcu_read_lock(); + + dsts = rcu_dereference(priv->dsts); + if (!dsts->count) + goto drop; + + if (skb->subport_cnt == 1 && skb->subport < dsts->count + && dsts->items[skb->subport].dev) { + + i = skb->subport; + + cloned = skb_clone(skb, GFP_KERNEL); + if (vpls_xmit_dst(cloned, priv, &dsts->items[i])) + consume_skb(cloned); + + } else { + for (i = 0; i < dsts->count; i++) + if (dsts->items[i].dev) { + cloned = skb_clone(skb, GFP_KERNEL); + if (vpls_xmit_dst(cloned, priv, &dsts->items[i])) + consume_skb(cloned); + } + } + + ret = 0; +drop: + rcu_read_unlock(); + consume_skb(skb); + return ret; +} + +static int vpls_rcv(void *arg, struct sk_buff *skb, struct net_device *in_dev, + struct packet_type *pt, struct mpls_shim_hdr *hdr, + struct net_device *orig_dev) +{ + struct net_device *dev = arg; + struct vpls_priv *priv = netdev_priv(dev); + struct mpls_entry_decoded dec; + struct vpls_dst_list *dsts; + size_t i; + + dec = mpls_entry_decode(hdr); + if (!dec.bos) { + pr_info("%s: incoming BoS mismatch\n", dev->name); + goto drop; + } + + rcu_read_lock(); + dsts = rcu_dereference(priv->dsts); + for (i = 0; i < dsts->count; i++) + if (dsts->items[i].dev && dec.label == dsts->items[i].label_in) + break; + + if (i == dsts->count) { + pr_info("%s: incoming label %u not found\n", dev->name, + dec.label); + rcu_read_unlock(); + goto drop; + } + rcu_read_unlock(); + + if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) + goto drop; + + skb->dev = dev; + + skb_reset_mac_header(skb); + skb->protocol = eth_type_trans(skb, dev); + skb->ip_summed = CHECKSUM_NONE; + skb->pkt_type = PACKET_HOST; + + skb_clear_hash(skb); + skb->vlan_tci = 0; + skb_set_queue_mapping(skb, 0); + skb_scrub_packet(skb, !net_eq(dev_net(in_dev), dev_net(dev))); + + skb_reset_network_header(skb); + skb_probe_transport_header(skb, 0); + + skb->subport_cnt = 1; + skb->subport = i; + + netif_rx(skb); + return 0; + +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +/* fake multicast ability */ +static void vpls_set_multicast_list(struct net_device *dev) +{ +} + +static int vpls_open(struct net_device *dev) +{ + struct vpls_priv *priv = netdev_priv(dev); + struct vpls_dst_list *dsts; + int rc; + size_t i; + + rcu_read_lock(); + dsts = rcu_dereference(priv->dsts); + for (i = 0; i < dsts->count; i++) + if (dsts->items[i].dev) { + struct vpls_dst *dst = &dsts->items[i]; + rc = mpls_handler_add(priv->encap_net, dst->label_in, + vpls_rcv, dev); + } + rcu_read_unlock(); + + netif_carrier_on(dev); + return 0; +} + +static int vpls_close(struct net_device *dev) +{ + struct vpls_priv *priv = netdev_priv(dev); + struct vpls_dst_list *dsts; + size_t i; + + netif_carrier_off(dev); + + rcu_read_lock(); + dsts = rcu_dereference(priv->dsts); + for (i = 0; i < dsts->count; i++) + if (dsts->items[i].dev) { + struct vpls_dst *dst = &dsts->items[i]; + mpls_handler_del(priv->encap_net, dst->label_in); + } + rcu_read_unlock(); + return 0; +} + +static int is_valid_vpls_mtu(int new_mtu) +{ + return new_mtu >= MIN_MTU && new_mtu <= MAX_MTU; +} + +static int vpls_change_mtu(struct net_device *dev, int new_mtu) +{ + if (!is_valid_vpls_mtu(new_mtu)) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +static int vpls_dev_init(struct net_device *dev) +{ + struct vpls_priv *priv = netdev_priv(dev); + priv->dsts = kzalloc(sizeof(struct vpls_dst_list), GFP_KERNEL); + + return 0; +} + +static void vpls_dev_free(struct net_device *dev) +{ + struct vpls_priv *priv = netdev_priv(dev); + struct vpls_dst_list *dsts; + size_t i; + + dsts = priv->dsts; + for (i = 0; i < dsts->count; i++) + if (dsts->items[i].dev) + dev_put(dsts->items[i].dev); + if (priv->dsts->items) + kfree(priv->dsts->items); + kfree(priv->dsts); + + if (priv->encap_net) + put_net(priv->encap_net); + + free_netdev(dev); +} + +static const struct net_device_ops vpls_netdev_ops = { + .ndo_init = vpls_dev_init, + .ndo_open = vpls_open, + .ndo_stop = vpls_close, + .ndo_start_xmit = vpls_xmit, + .ndo_change_mtu = vpls_change_mtu, + .ndo_set_rx_mode = vpls_set_multicast_list, + .ndo_set_mac_address = eth_mac_addr, + .ndo_features_check = passthru_features_check, +}; + +#define VPLS_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | \ + NETIF_F_HW_CSUM | NETIF_F_RXCSUM | NETIF_F_HIGHDMA) + +static void vpls_setup(struct net_device *dev) +{ + ether_setup(dev); + + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + dev->priv_flags |= IFF_NO_QUEUE; + + dev->netdev_ops = &vpls_netdev_ops; + dev->features |= NETIF_F_LLTX; + dev->features |= VPLS_FEATURES; + dev->vlan_features = dev->features; + dev->destructor = vpls_dev_free; + + dev->hw_features = VPLS_FEATURES; + dev->hw_enc_features = VPLS_FEATURES; +} + +/* + * netlink interface + */ + +static int vpls_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + if (tb[IFLA_MTU]) { + if (!is_valid_vpls_mtu(nla_get_u32(tb[IFLA_MTU]))) + return -EINVAL; + } + return 0; +} + +static struct rtnl_link_ops vpls_link_ops; + +static int vpls_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + int err; + struct vpls_priv *priv = netdev_priv(dev); + + if (tb[IFLA_ADDRESS] == NULL) + eth_hw_addr_random(dev); + + if (tb[IFLA_IFNAME]) + nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); + else + snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); + + err = register_netdevice(dev); + if (err < 0) + goto err; + priv->encap_net = get_net(src_net); + + netif_carrier_off(dev); + return 0; + +err: + return err; +} + +static void vpls_dellink(struct net_device *dev, struct list_head *head) +{ + unregister_netdevice_queue(dev, head); +} + + +static struct rtnl_link_ops vpls_link_ops = { + .kind = DRV_NAME, + .priv_size = sizeof(struct vpls_priv), + .setup = vpls_setup, + .validate = vpls_validate, + .newlink = vpls_newlink, + .dellink = vpls_dellink, +}; + +/* + * GENL wire-control interface + */ + +static struct nla_policy vpls_genl_policy[VPLS_ATTR_MAX + 1] = { + [VPLS_ATTR_IFINDEX] = { .type = NLA_U32 }, + [VPLS_ATTR_WIREID] = { .type = NLA_U32 }, + [VPLS_ATTR_LABEL_IN] = { .type = NLA_U32 }, + [VPLS_ATTR_LABEL_OUT] = { .type = NLA_U32 }, + [VPLS_ATTR_NH_DEV] = { .type = NLA_U32 }, + [VPLS_ATTR_NH_IP] = { .type = NLA_U32 }, + [VPLS_ATTR_NH_IPV6] = { .len = sizeof(struct in6_addr) }, + [VPLS_ATTR_TTL] = { .type = NLA_U8 }, + [VPLS_ATTR_VLANID] = { .type = NLA_U16 }, +}; + +static int vpls_genl_newwire(struct sk_buff *skb, struct genl_info *info); +static int vpls_genl_getwire(struct sk_buff *skb, struct genl_info *info); +static int vpls_genl_dumpwire(struct sk_buff *skb, struct netlink_callback *cb); +static int vpls_genl_delwire(struct sk_buff *skb, struct genl_info *info); + +static struct genl_ops vpls_genl_ops[] = { + { + .cmd = VPLS_CMD_NEWWIRE, + .flags = GENL_ADMIN_PERM, + .policy = vpls_genl_policy, + .doit = vpls_genl_newwire, + .dumpit = NULL, + }, + { + .cmd = VPLS_CMD_GETWIRE, + .flags = GENL_ADMIN_PERM, + .policy = vpls_genl_policy, + .doit = vpls_genl_getwire, + .dumpit = vpls_genl_dumpwire, + }, + { + .cmd = VPLS_CMD_DELWIRE, + .flags = GENL_ADMIN_PERM, + .policy = vpls_genl_policy, + .doit = vpls_genl_delwire, + .dumpit = NULL, + }, +}; + +struct genl_multicast_group vpls_genl_groups[] = { + { + .name = "newwire", + }, +}; + +static struct genl_family vpls_genl_family = { + .hdrsize = 0, + .name = "vpls", + .version = 1, + .maxattr = VPLS_ATTR_MAX, + + .ops = vpls_genl_ops, + .n_ops = ARRAY_SIZE(vpls_genl_ops), + .mcgrps = vpls_genl_groups, + .n_mcgrps = ARRAY_SIZE(vpls_genl_groups), +}; + + +static int vpls_genl_newwire(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **data = info->attrs; + struct net *net = sock_net(skb->sk); + int ret = -EINVAL; + struct net_device *vplsdev, *outdev; + struct vpls_priv *priv; + struct vpls_dst_list *dsts, *newdsts; + u32 wireid; + size_t count; + unsigned remove_lbl = 0; + + if (!data[VPLS_ATTR_WIREID] || !data[VPLS_ATTR_IFINDEX]) + return -EINVAL; + if (!data[VPLS_ATTR_NH_DEV] || !data[VPLS_ATTR_NH_IP] || + !data[VPLS_ATTR_NH_IP]) + return -EINVAL; + if (!data[VPLS_ATTR_LABEL_OUT] || !data[VPLS_ATTR_LABEL_IN]) + return -EINVAL; + + wireid = nla_get_u32(data[VPLS_ATTR_WIREID]); + if (wireid >= MAXWIRES) + return -EINVAL; + + rtnl_lock(); + + vplsdev = __dev_get_by_index(net, nla_get_u32(data[VPLS_ATTR_IFINDEX])); + if (!vplsdev || vplsdev->netdev_ops != &vpls_netdev_ops) + goto out_unlock; + + outdev = dev_get_by_index(net, nla_get_u32(data[VPLS_ATTR_NH_DEV])); + if (!outdev) + goto out_unlock; + + priv = netdev_priv(vplsdev); + dsts = priv->dsts; + count = dsts->count; + if (wireid < count && dsts->items[wireid].dev) { + if ((info->nlhdr->nlmsg_flags & (NLM_F_EXCL + | NLM_F_REPLACE)) != NLM_F_REPLACE) { + ret = -EEXIST; + goto out_drop_outdev; + } + remove_lbl = dsts->items[wireid].label_in; + } else { + if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE)) { + ret = -ENOENT; + goto out_drop_outdev; + } + if (wireid >= count) + count = wireid + 1; + } + newdsts = kzalloc(sizeof(struct vpls_dst_list), GFP_KERNEL); + if (!newdsts) { + ret = -ENOMEM; + goto out_drop_outdev; + } + newdsts->count = count; + newdsts->items = kzalloc(sizeof(newdsts->items[0]) * newdsts->count, GFP_KERNEL); + memcpy(newdsts->items, dsts->items, sizeof(dsts->items[0]) * dsts->count); + + if (newdsts->items[wireid].dev) + dev_put(newdsts->items[wireid].dev); + newdsts->items[wireid].label_in = nla_get_u32(data[VPLS_ATTR_LABEL_IN]); + newdsts->items[wireid].label_out = nla_get_u32(data[VPLS_ATTR_LABEL_OUT]); + newdsts->items[wireid].dev = outdev; + newdsts->items[wireid].ttl = nla_get_u8(data[VPLS_ATTR_TTL]); + if (data[VPLS_ATTR_NH_IP]) { + newdsts->items[wireid].addr.sin.sin_addr.s_addr = nla_get_in_addr(data[VPLS_ATTR_NH_IP]); + newdsts->items[wireid].flags |= VPLS_F_INET; + newdsts->items[wireid].via_table = NEIGH_ARP_TABLE; + } else if (data[VPLS_ATTR_NH_IPV6]) { + if (!IS_ENABLED(CONFIG_IPV6)) + return -EPFNOSUPPORT; + newdsts->items[wireid].addr.sin6.sin6_addr = nla_get_in6_addr(data[VPLS_ATTR_NH_IPV6]); + newdsts->items[wireid].flags |= VPLS_F_INET6; + newdsts->items[wireid].via_table = NEIGH_ND_TABLE; + } + if (data[VPLS_ATTR_VLANID]) { + newdsts->items[wireid].vlan_id = nla_get_u16(data[VPLS_ATTR_VLANID]); + newdsts->items[wireid].flags |= VPLS_F_VLAN; + } + + if (remove_lbl && remove_lbl != newdsts->items[wireid].label_in) + mpls_handler_del(priv->encap_net, remove_lbl); + + if (remove_lbl != newdsts->items[wireid].label_in) + ret = mpls_handler_add(priv->encap_net, + newdsts->items[wireid].label_in, + vpls_rcv, vplsdev); + + rcu_assign_pointer(priv->dsts, newdsts); + rtnl_unlock(); + + synchronize_rcu(); + + kfree(dsts->items); + kfree(dsts); + + return 0; + +out_drop_outdev: + dev_put(outdev); +out_unlock: + rtnl_unlock(); + return ret; +} + +static int vpls_genl_delwire(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **data = info->attrs; + struct net *net = sock_net(skb->sk); + int ret = -EINVAL; + struct net_device *vplsdev; + struct vpls_priv *priv; + struct vpls_dst_list *dsts, *newdsts; + u32 wireid; + size_t count; + + if (!data[VPLS_ATTR_WIREID] || !data[VPLS_ATTR_IFINDEX]) + return -EINVAL; + + wireid = nla_get_u32(data[VPLS_ATTR_WIREID]); + if (wireid >= MAXWIRES) + return -EINVAL; + + rtnl_lock(); + + vplsdev = __dev_get_by_index(net, nla_get_u32(data[VPLS_ATTR_IFINDEX])); + if (!vplsdev || vplsdev->netdev_ops != &vpls_netdev_ops) + goto out_unlock; + priv = netdev_priv(vplsdev); + + dsts = priv->dsts; + count = dsts->count; + if (wireid >= count || !dsts->items[wireid].dev) { + ret = -ENOENT; + goto out_unlock; + } + + mpls_handler_del(priv->encap_net, dsts->items[wireid].label_in); + + if (wireid + 1 == count) + for (count--; count && !dsts->items[count - 1].dev; count--) + ; + + newdsts->count = count; + newdsts->items = kzalloc(sizeof(newdsts->items[0]) * count, GFP_KERNEL); + memcpy(newdsts->items, dsts->items, sizeof(dsts->items[0]) * count); + if (wireid < count) + memset(&newdsts->items[wireid], 0, sizeof(newdsts->items[0])); + + rcu_assign_pointer(priv->dsts, newdsts); + rtnl_unlock(); + + synchronize_rcu(); + + kfree(dsts->items); + kfree(dsts); + + return 0; + +out_unlock: + rtnl_unlock(); + return ret; +} + +static int vpls_nl_wire_msg(struct sk_buff *msg, struct net_device *dev, + int cmd, unsigned wireid, struct vpls_dst *dst, + u32 portid, u32 seq, int flags) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &vpls_genl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u32(msg, VPLS_ATTR_IFINDEX, dev->ifindex)) + goto nla_put_failure; + if (nla_put_u32(msg, VPLS_ATTR_WIREID, wireid)) + goto nla_put_failure; + if (nla_put_u32(msg, VPLS_ATTR_NH_DEV, dst->dev->ifindex)) + goto nla_put_failure; + if (dst->flags & VPLS_F_INET) { + if (nla_put_in_addr(msg, VPLS_ATTR_NH_IP, + dst->addr.sin.sin_addr.s_addr)) + goto nla_put_failure; + } else if (dst->flags & VPLS_F_INET6) { + if (nla_put_in6_addr(msg, VPLS_ATTR_NH_IPV6, + &dst->addr.sin6.sin6_addr)) + goto nla_put_failure; + } + if (nla_put_u32(msg, VPLS_ATTR_LABEL_IN, dst->label_in)) + goto nla_put_failure; + if (nla_put_u32(msg, VPLS_ATTR_LABEL_OUT, dst->label_out)) + goto nla_put_failure; + if (nla_put_u8(msg, VPLS_ATTR_TTL, dst->ttl)) + goto nla_put_failure; + if (dst->flags & VPLS_F_VLAN) + if (nla_put_u16(msg, VPLS_ATTR_VLANID, dst->vlan_id)) + goto nla_put_failure; + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int vpls_genl_getwire(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **data = info->attrs; + struct net *net = sock_net(skb->sk); + int ret = -EINVAL; + struct net_device *vplsdev; + struct vpls_priv *priv; + struct vpls_dst_list *dsts; + u32 wireid; + struct sk_buff *msg; + + if (!data[VPLS_ATTR_WIREID] || !data[VPLS_ATTR_IFINDEX]) + return -EINVAL; + + wireid = nla_get_u32(data[VPLS_ATTR_WIREID]); + if (wireid >= MAXWIRES) + return -EINVAL; + + rtnl_lock(); + + vplsdev = __dev_get_by_index(net, nla_get_u32(data[VPLS_ATTR_IFINDEX])); + if (!vplsdev || vplsdev->netdev_ops != &vpls_netdev_ops) + goto out_unlock; + + priv = netdev_priv(vplsdev); + dsts = priv->dsts; + + if (wireid >= dsts->count || !dsts->items[wireid].dev) { + ret = -ENOENT; + goto out_unlock; + } else { + ret = -ENOMEM; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + goto out_unlock; + ret = vpls_nl_wire_msg(msg, vplsdev, VPLS_CMD_NEWWIRE, wireid, + &dsts->items[wireid], info->snd_portid, info->snd_seq, + 0); + if (ret) + goto out_unlock; + + ret = genlmsg_reply(msg, info); + } + + rtnl_unlock(); + return 0; + +out_unlock: + rtnl_unlock(); + return ret; +} + +static int vpls_genl_dumpwire(struct sk_buff *skb, struct netlink_callback *cb) +{ + int ret; + struct nlattr *attrs[VPLS_ATTR_MAX+1]; + unsigned ifindex; + struct net *net = sock_net(skb->sk); + struct net_device *vplsdev; + struct vpls_priv *priv; + struct vpls_dst_list *dsts; + u32 wireid; + + if (!cb->args[0]) { + ret = nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, + ARRAY_SIZE(attrs), vpls_genl_policy, NULL); + if (ret) + return ret; + if (!attrs[VPLS_ATTR_IFINDEX]) + return -EINVAL; + ifindex = cb->args[0] = nla_get_u32(attrs[VPLS_ATTR_IFINDEX]); + } else { + ifindex = cb->args[0]; + } + + rtnl_lock(); + + ret = -ENODEV; + vplsdev = __dev_get_by_index(net, ifindex); + if (!vplsdev || vplsdev->netdev_ops != &vpls_netdev_ops) + goto out_unlock; + + priv = netdev_priv(vplsdev); + dsts = priv->dsts; + + wireid = cb->args[1]; + for (wireid = cb->args[1]; wireid < dsts->count; wireid++) + if (dsts->items[wireid].dev) + break; + ret = 0; + if (wireid == dsts->count) + goto out_unlock; + cb->args[1] = wireid + 1; + + ret = vpls_nl_wire_msg(skb, vplsdev, VPLS_CMD_NEWWIRE, wireid, + &dsts->items[wireid], + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (ret == 0) + ret = skb->len; + +out_unlock: + rtnl_unlock(); + return ret; +} + +/* + * init/fini + */ + +static __init int vpls_init(void) +{ + int ret; + + ret = genl_register_family(&vpls_genl_family); + if (ret) + return ret; + + ret = rtnl_link_register(&vpls_link_ops); + if (ret) + goto out_unreg_family; + + return 0; + +out_unreg_family: + genl_unregister_family(&vpls_genl_family); + return ret; +} + +static __exit void vpls_exit(void) +{ + genl_unregister_family(&vpls_genl_family); + rtnl_link_unregister(&vpls_link_ops); +} + +module_init(vpls_init); +module_exit(vpls_exit); + +MODULE_DESCRIPTION("Virtual Private LAN Service"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_RTNL_LINK(DRV_NAME); diff --git a/net/mpls/vpls.h b/net/mpls/vpls.h new file mode 100644 index 0000000000000..bb21e49778a7d --- /dev/null +++ b/net/mpls/vpls.h @@ -0,0 +1,34 @@ +#ifndef _VPLS_H +#define _VPLS_H + +#define VPLS_F_INET 0x01 +#define VPLS_F_INET6 0x02 +#define VPLS_F_VLAN 0x04 + +enum { + VPLS_ATTR_UNSPEC = 0, + VPLS_ATTR_IFINDEX, + VPLS_ATTR_WIREID, + VPLS_ATTR_LABEL_IN, + VPLS_ATTR_LABEL_OUT, + VPLS_ATTR_NH_DEV, + VPLS_ATTR_NH_IP, + VPLS_ATTR_NH_IPV6, + VPLS_ATTR_TTL, + VPLS_ATTR_VLANID, + __VPLS_ATTR_MAX, +}; +#define VPLS_ATTR_MAX (__VPLS_ATTR_MAX - 1) + +enum { + VPLS_CMD_UNSPEC = 0, + + VPLS_CMD_NEWWIRE = 4, + VPLS_CMD_DELWIRE, + VPLS_CMD_GETWIRE, + VPLS_CMD_SETWIRE, + __VPLS_CMD_MAX, +}; +#define VPLS_CMD_MAX (__VPLS_CMD_MAX - 1) + +#endif /* _VPLS_H */