Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ Usage:
Path MTU Daemon is captures and broadcasts ICMP messages related to
MTU detection. It listens on an interface, waiting for ICMP messages
(IPv4 type 3 code 4 or IPv6 type 2 code 0) and it forwards them
verbatim to the broadcast ethernet address.
verbatim normally to the broadcast ethernet address. If a list of peers
is given then ICMP messages are forwarded using normal routing to these
peers enabling distribution across different subnets.

Options:

Expand All @@ -31,6 +33,8 @@ Options:
--ports Forward only ICMP packets with payload
containing L4 source port on this list
(comma separated)
--peers Resend ICMP packets to this peer list
(comma separated)
--help Print this message

Example:
Expand All @@ -46,7 +50,8 @@ Once again, it listens waiting for packets matching:
(ether dst not ff:ff:ff:ff:ff:ff)

And having appropriate length, and forwards them to ethernet broadcast
ff:ff:ff:ff:ff:ff.
ff:ff:ff:ff:ff:ff or using normal packet routing if a list of peers
is specified.

To debug use tcpdump:

Expand Down
50 changes: 46 additions & 4 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ static void usage()
" containing L4 source port on this "
"list\n"
" (comma separated)\n"
" --peers Resend ICMP packets to this peer "
"list\n"
" (comma separated)\n"
" --help Print this message\n"
"\n"
"Example:\n"
Expand Down Expand Up @@ -90,12 +93,15 @@ struct state
pcap_t *pcap;
struct nflog *nflog;
int raw_sd;
int raw4;
int raw6;
struct hashlimit *sources;
struct hashlimit *ifaces;
int verbose;
int dry_run;
int strict;
uint64_t *ports_map;
void *peer_list;
};

static int handle_packet(const uint8_t *p, unsigned data_len, void *userdata)
Expand All @@ -105,6 +111,7 @@ static int handle_packet(const uint8_t *p, unsigned data_len, void *userdata)
const char *reason = "unknown";
int mtu_of_next_hop = -1;
int l4_sport = -1;
int ttl = -1;

/* assumming DLT_EN10MB */

Expand Down Expand Up @@ -145,6 +152,7 @@ static int handle_packet(const uint8_t *p, unsigned data_len, void *userdata)
valid = 1;
hash = &p[l3_offset + 12];
hash_len = 4;
ttl = p[l3_offset + 8];
}
}

Expand All @@ -158,6 +166,7 @@ static int handle_packet(const uint8_t *p, unsigned data_len, void *userdata)
valid = 1;
hash = &p[l3_offset + 8];
hash_len = 16;
ttl = p[l3_offset + 7];
}
}

Expand Down Expand Up @@ -233,6 +242,13 @@ static int handle_packet(const uint8_t *p, unsigned data_len, void *userdata)
}
}

/* Check if this packet was received from a L3 peer */
if (state->peer_list != NULL &&
check_peerlist(state->peer_list, hash, hash_len) == 0) {
reason = "Received from L3 peer";
goto reject;
}

uint8_t dst_mac[6];
memcpy(dst_mac, p, 6);

Expand Down Expand Up @@ -275,10 +291,17 @@ static int handle_packet(const uint8_t *p, unsigned data_len, void *userdata)
}

if (state->dry_run == 0) {
int r = send(state->raw_sd, pp, data_len, 0);
/* ENOBUFS happens during IRQ storms okay to ignore */
if (r < 0 && errno != ENOBUFS) {
PFATAL("send()");
if (state->peer_list == NULL) {
int r = send(state->raw_sd, pp, data_len, 0);
/* ENOBUFS happens during IRQ storms okay to ignore */
if (r < 0 && errno != ENOBUFS) {
PFATAL("send()");
}
} else {
sendto_peerlist(state->peer_list, state->raw4,
state->raw6, hash_len,
p + icmp_offset,
data_len - icmp_offset, ttl);
}
}
return 1;
Expand Down Expand Up @@ -370,6 +393,7 @@ int main(int argc, char *argv[])
{"help", no_argument, 0, 'h'},
{"ports", required_argument, 0, 'p'},
{"strict", no_argument, 0, 't'},
{"peers", required_argument, 0, 'P'},
{NULL, 0, 0, 0}};

const char *optstring = optstring_from_long_options(long_options);
Expand All @@ -382,6 +406,7 @@ int main(int argc, char *argv[])
int dry_run = 0;
int taskset_cpu = -1;
uint64_t *ports_map = NULL;
struct peer *peer_list = NULL;
int strict = 0;

optind = 1;
Expand Down Expand Up @@ -458,6 +483,16 @@ int main(int argc, char *argv[])
break;
}

case 'P': {
const char **addresses = parse_argv(optarg, ',');
if (addresses[0] == NULL) {
FATAL("Warning peer list passed with -P was empty");
}
peer_list = make_peerlist(addresses);
free(addresses);
break;
}

case 'v':
verbose++;
break;
Expand Down Expand Up @@ -504,7 +539,13 @@ int main(int argc, char *argv[])
state.strict = strict;
state.dry_run = dry_run;
state.ports_map = ports_map;
state.peer_list = peer_list;
state.raw_sd = setup_raw(iface);
state.raw4 = -1;
state.raw6 = -1;
if (peer_list != NULL) {
setup_rawipsocket(&state.raw4, &state.raw6);
}

struct uevent uevent;
uevent_new(&uevent);
Expand Down Expand Up @@ -569,6 +610,7 @@ int main(int argc, char *argv[])
if (state.ports_map) {
bitmap_free(state.ports_map);
}
free_peerlist(state.peer_list);

return 0;
}
121 changes: 121 additions & 0 deletions src/net.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
//
// Copyright (c) 2015 CloudFlare, Inc.

#include <errno.h>
#include <getopt.h>
#include <pcap.h>
#include <stdio.h>
Expand All @@ -12,11 +13,23 @@
#include <linux/if_packet.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <netdb.h>
#include <string.h>
#include <sys/ioctl.h>

#include "pmtud.h"

#define MAX_PEERS 32

struct peer
{
union {
struct sockaddr_in sin;
struct sockaddr_in6 sin6;
} sa;
socklen_t salen;
};

pcap_t *setup_pcap(const char *iface, const char *bpf_filter, int snap_len,
struct pcap_stat *stats)
{
Expand Down Expand Up @@ -179,3 +192,111 @@ const char *ip_to_string(const uint8_t *p, int p_len)
}
return dst;
}

void setup_rawipsocket(int *raw4, int *raw6)
{

*raw4 = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP);
if (*raw4 < 0) {
PFATAL("socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)");
}
*raw6 = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6);
if (*raw6 < 0) {
PFATAL("socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6)");
}
}

struct peer *make_peerlist(const char **addresses)
{
struct addrinfo hints;
struct addrinfo *result;
struct peer *peer_list;
int r;
int i;

peer_list = calloc(MAX_PEERS, sizeof(struct peer));
if (peer_list == NULL) {
PFATAL("malloc(peer_list)");
}

memset(&hints, 0, sizeof(struct addrinfo));
hints.ai_flags = AI_NUMERICHOST;
for (i = 0; addresses[0] != NULL; addresses++, i++) {
if (i >= MAX_PEERS) {
FATAL("Maximum number of peers exceeded %d",
MAX_PEERS);
}
r = getaddrinfo(addresses[0], NULL, &hints, &result);
if (r != 0) {
FATAL("Malformed peer address %s", addresses[0]);
}
if (result->ai_addrlen > sizeof(peer_list[i].sa)) {
FATAL("Internal error in address structures");
}
memcpy(&peer_list[i].sa, result->ai_addr, result->ai_addrlen);
peer_list[i].salen = result->ai_addrlen;
freeaddrinfo(result);
}
return peer_list;
}

void free_peerlist(struct peer *peer_list)
{
free(peer_list);
}

int check_peerlist(struct peer *peer_list, const uint8_t *p, int p_len)
{
struct peer *peer;

for (peer = peer_list; peer < peer_list + MAX_PEERS && peer->salen != 0;
peer++) {
if (p_len == 4 && peer->sa.sin.sin_family == AF_INET) {
if (memcmp(&peer->sa.sin.sin_addr, p, p_len) == 0) {
return 0;
}
} else if (p_len == 16 && peer->sa.sin6.sin6_family == AF_INET6) {
if (memcmp(&peer->sa.sin6.sin6_addr, p, p_len) == 0) {
return 0;
}
}
}
return -1;
}

void sendto_peerlist(struct peer *peer_list, int raw4, int raw6, int addr_len,
const uint8_t *icmppkt, unsigned icmppkt_len, int orig_ttl)
{
struct peer *peer;
int family;
int r;
int ttl;

if (addr_len == 4) {
family = AF_INET;
} else if (addr_len == 16) {
family = AF_INET6;
} else {
FATAL("addr_len is neither 4 nor 16");
}

ttl = orig_ttl - 1;
if (ttl == 0)
return;
setsockopt(raw4, IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl));
setsockopt(raw6, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl,
sizeof(ttl));
for (peer = peer_list; peer < peer_list + MAX_PEERS && peer->salen != 0;
peer++) {
if (peer->sa.sin.sin_family != family) {
continue;
}
r = sendto((family == AF_INET ? raw4 : raw6),
icmppkt, icmppkt_len, 0, (struct sockaddr *)&peer->sa,
peer->salen);
/* ENOBUFS happens during IRQ storms okay to ignore */
if (r < 0 && errno != ENOBUFS) {
PFATAL("sendto()");
}
}
}
7 changes: 7 additions & 0 deletions src/pmtud.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,18 @@ int signal_desc(int signal);
const char **parse_argv(const char *str, char delim);

/* pcap.c */
struct peer;
pcap_t *setup_pcap(const char *iface, const char *bpf_filter, int snap_len,
struct pcap_stat *stats);
void unsetup_pcap(pcap_t *pcap, const char *iface, struct pcap_stat *stats);
int setup_raw(const char *iface);
const char *ip_to_string(const uint8_t *p, int p_len);
void setup_rawipsocket(int *raw4, int *raw6);
struct peer *make_peerlist(const char **addresses);
void free_peerlist(struct peer *peer_list);
int check_peerlist(struct peer *peer_list, const uint8_t *p, int p_len);
void sendto_peerlist(struct peer *peer_list, int raw4, int raw6, int addr_len,
const uint8_t *icmppkt, unsigned icmppkt_len, int orig_ttl);

/* sched.c */
int taskset(int taskset_cpu);
Expand Down