799 lines
29 KiB
C
799 lines
29 KiB
C
|
#include <linux/netlink.h>
|
||
|
#include <linux/rtnetlink.h>
|
||
|
#include <sys/mman.h>
|
||
|
#include <arpa/inet.h>
|
||
|
#include <sys/socket.h>
|
||
|
#include <unistd.h>
|
||
|
#include <error.h>
|
||
|
#include <errno.h>
|
||
|
#include <stdio.h>
|
||
|
#include <stdint.h>
|
||
|
#include <assert.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
// Route translation:
|
||
|
// IPv6 prefix: fd7a:81db:3c57:fffe:0000:0000:xxxx:xxxx
|
||
|
// IPv6 prefix length must be at least 96 plus min ipv4 prefix, or the route is ignored.
|
||
|
// IPv4 part of IPv6 prefix must match the IPv4 prefix
|
||
|
// IPv4 address is always last 32 bits of IPv6 address. In the usual case, /128 routes are used.
|
||
|
|
||
|
// Routes with unknown attributes are ignored. In particular:
|
||
|
// RTA_TABLE (must be RT_TABLE_MAIN if present)
|
||
|
// RTA_SRC, RTA_IIF (only used when querying? not sure)
|
||
|
// RTA_ENCAP_TYPE, RTA_ENCAP (TODO: these could be copied, but testing would be needed)
|
||
|
// RTA_NH_ID (TODO? what is this? alternate way of specifying nexthop?)
|
||
|
// RTA_VIA (not seen on ipv6 routes; used to specify ipv4 nexthop on ipv6 routes)
|
||
|
// Attributes that are copied are:
|
||
|
// RTA_OIF (output interface)
|
||
|
// RTA_PRIORITY (aka metric)
|
||
|
// RTA_GATEWAY (converted to RTM_VIA for ipv4 table, but holds the same address)
|
||
|
// RTA_MULTIPATH (RTM_GATEWAY within gets converted to RTM_VIA for ipv4 table, but holds the same address. all other sub-attributes copied.)
|
||
|
// Attributes with very special handling are:
|
||
|
// RTA_DST
|
||
|
// Attributes that are knowingly ignored are:
|
||
|
// RTA_PREFSRC
|
||
|
// RTA_CACHEINFO
|
||
|
// RTA_EXPIRES
|
||
|
// RTA_PREF (RFC4191, IPv6-only)
|
||
|
|
||
|
// Only manage routes to this part of the v4 address space (172.23.101.144/28)
|
||
|
const unsigned char v4_reqprefix[4] = {0xac, 0x17, 0x65, 0x90};
|
||
|
#define V4_MIN_PREFIX_LENGTH 28
|
||
|
const unsigned char v6_reqprefix[16] = {0xfd, 0x7a, 0x81, 0xdb, 0x3c, 0x57, 0xff, 0xfe, 0,0,0,0, 0xac,0x17,0x65,0x90};
|
||
|
#define V6_MIN_PREFIX_LENGTH (96+V4_MIN_PREFIX_LENGTH)
|
||
|
|
||
|
#define RTPROT_OSPF6TO4 RTPROT_EIGRP // grab some otherwise-unused value
|
||
|
|
||
|
|
||
|
static const struct sockaddr_nl kernel_receiver = {.nl_family = AF_NETLINK, .nl_pid = 0};
|
||
|
static int nlsock;
|
||
|
static struct sockaddr_nl bindaddr = {
|
||
|
.nl_family = AF_NETLINK,
|
||
|
.nl_groups = RTMGRP_IPV6_ROUTE,
|
||
|
};
|
||
|
|
||
|
static void request_dump(const char *label, int af, int sequence) {
|
||
|
struct {
|
||
|
struct nlmsghdr nlhdr;
|
||
|
struct rtmsg rthdr;
|
||
|
} request = {
|
||
|
.nlhdr = {
|
||
|
.nlmsg_len = sizeof request,
|
||
|
.nlmsg_type = RTM_GETROUTE,
|
||
|
.nlmsg_flags = NLM_F_REQUEST|NLM_F_DUMP,
|
||
|
.nlmsg_seq = sequence,
|
||
|
.nlmsg_pid = 0,//bindaddr.nl_pid, // either works?
|
||
|
},
|
||
|
.rthdr = {
|
||
|
.rtm_family = af,
|
||
|
.rtm_dst_len = 0, // dst prefix mask bit length
|
||
|
.rtm_src_len = 0, // src prefix mask bit length
|
||
|
.rtm_tos = 0, // unused?
|
||
|
|
||
|
.rtm_table = RT_TABLE_MAIN,
|
||
|
.rtm_protocol = RTPROT_UNSPEC, // TODO would be a useful filter, eg RTPROT_BIRD
|
||
|
.rtm_scope = RT_SCOPE_UNIVERSE,
|
||
|
.rtm_type = RTN_UNSPEC,
|
||
|
|
||
|
.rtm_flags = 0,
|
||
|
},
|
||
|
};
|
||
|
if(sendto(nlsock, &request, sizeof request, 0, (struct sockaddr*)&kernel_receiver, sizeof kernel_receiver) < 0)
|
||
|
error(1, errno, "failed to send %s route dump request", label);
|
||
|
}
|
||
|
|
||
|
// First: dump ipv6 routes
|
||
|
// Then: dump ipv4 routes and remove any that don't make sense.
|
||
|
// Then: process updates as they come in.
|
||
|
static enum {
|
||
|
state_dumping_ipv6,
|
||
|
state_dumping_ipv4,
|
||
|
state_realtime,
|
||
|
} cur_state;
|
||
|
|
||
|
// There's one structure per ipv6 route that we translate.
|
||
|
// (not per ipv4 route if it doesn't have an associated ipv6 route - we delete those immediately)
|
||
|
// There should only be a small number of relevant routes, so we should be fine storing them in an unsorted dynamic array.
|
||
|
// at least we do use a cache-efficient SoA data structure to hold the keys.
|
||
|
// IPv6 routes are stored paired with their IPv4 route. Both are stored in direct rtnetlink format, even though it's
|
||
|
// difficult to manipulate.
|
||
|
struct route {
|
||
|
struct nlmsghdr *ipv6_rtnl; // directly as received from rtnetlink
|
||
|
struct nlmsghdr *ipv4_rtnl; // what the kernel knows at the moment. NULL if no route in table.
|
||
|
struct nlmsghdr *ipv4_new; // as we want it to be. If up-to-date, ipv4_new==ipv4_rtnl - be careful not to double-free.
|
||
|
|
||
|
// Odd-numbered states are waiting for an acknowledgement from the kernel. Sequence number is always stored in outstanding_seq. Otherwise it's meaningless.
|
||
|
enum route_state {
|
||
|
rtst_unknown = 0,
|
||
|
rtst_deleting_old = 1,
|
||
|
rtst_creating_new = 3,
|
||
|
rtst_stable = 4,
|
||
|
rtst_error = 6,
|
||
|
} state;
|
||
|
uint32_t outstanding_seq;
|
||
|
};
|
||
|
static struct route *routes;
|
||
|
static uint32_t *route_addrs; // primary key; network byte order
|
||
|
static unsigned char *route_masklens; // secondary key
|
||
|
static unsigned int routes_used;
|
||
|
static unsigned int routes_allocated;
|
||
|
|
||
|
static struct route *find_route(uint32_t ipv4_addr, int masklen) {
|
||
|
for(int i = 0; i < routes_used; i++) {
|
||
|
if(route_addrs[i] == ipv4_addr && route_masklens[i] == masklen)
|
||
|
return &routes[i];
|
||
|
}
|
||
|
return NULL;
|
||
|
}
|
||
|
static struct route *append_route(uint32_t ipv4_addr, int masklen) {
|
||
|
if(routes_used == routes_allocated) {
|
||
|
unsigned int new_alloc = routes_allocated + (routes_allocated >> 1);
|
||
|
if(new_alloc < routes_allocated || new_alloc > (1<<30)/sizeof(*routes))
|
||
|
error(1, 0, "memory allocation overflow (too many routes)");
|
||
|
if(new_alloc < 5) new_alloc = 5;
|
||
|
routes = realloc(routes, new_alloc*sizeof(*routes));
|
||
|
route_addrs = realloc(route_addrs, new_alloc*sizeof(*route_addrs));
|
||
|
route_masklens = realloc(route_masklens, new_alloc*sizeof(*route_masklens));
|
||
|
if(!routes || !route_addrs || !route_masklens)
|
||
|
error(1, 0, "failed to allocate memory for routes");
|
||
|
routes_allocated = new_alloc;
|
||
|
}
|
||
|
route_addrs[routes_used] = ipv4_addr;
|
||
|
route_masklens[routes_used] = masklen;
|
||
|
routes_used++;
|
||
|
struct route *r = &routes[routes_used-1];
|
||
|
memset(r, 0, sizeof(*r));
|
||
|
return r;
|
||
|
}
|
||
|
static void remove_route_from_table(unsigned int index) {
|
||
|
assert(index < routes_used);
|
||
|
struct route *r = &routes[index];
|
||
|
|
||
|
free(r->ipv6_rtnl);
|
||
|
if(r->ipv4_new != r->ipv4_rtnl) free(r->ipv4_new);
|
||
|
free(r->ipv4_rtnl);
|
||
|
|
||
|
if(index != routes_used - 1) {
|
||
|
assert(routes_used > 0);
|
||
|
*r = routes[routes_used - 1];
|
||
|
route_addrs[index] = route_addrs[routes_used - 1];
|
||
|
route_masklens[index] = route_masklens[routes_used - 1];
|
||
|
}
|
||
|
routes_used--;
|
||
|
}
|
||
|
|
||
|
static uint32_t next_sequence() {
|
||
|
static uint32_t next = 1;
|
||
|
next++;
|
||
|
if(next == 0) next = 1; // don't use 0
|
||
|
// we just assume that sequence numbers won't wrap around until the previous request using that number has completed
|
||
|
return next;
|
||
|
}
|
||
|
|
||
|
#if 0
|
||
|
static const char *const rta_names[] = {
|
||
|
// don't know what all of these mean, but they're defined
|
||
|
[RTA_DST]="dst",
|
||
|
[RTA_SRC]="src",
|
||
|
[RTA_IIF]="iif",
|
||
|
[RTA_OIF]="oif",
|
||
|
[RTA_GATEWAY]="gateway",
|
||
|
[RTA_PRIORITY]="priority",
|
||
|
[RTA_PREFSRC]="prefsrc", // this one can't be transferred to ipv4
|
||
|
[RTA_METRICS]="metrics",
|
||
|
[RTA_MULTIPATH]="multipath", // TODO
|
||
|
[RTA_FLOW]="flow",
|
||
|
[RTA_CACHEINFO]="cacheinfo",
|
||
|
[RTA_TABLE]="table",
|
||
|
[RTA_MARK]="mark",
|
||
|
[RTA_MFC_STATS]="mfc_stats",
|
||
|
[RTA_VIA]="via",
|
||
|
[RTA_NEWDST]="newdst",
|
||
|
[RTA_PREF]="pref",
|
||
|
[RTA_ENCAP_TYPE]="encaptype",
|
||
|
[RTA_ENCAP]="encap",
|
||
|
[RTA_EXPIRES]="expires",
|
||
|
[RTA_PAD]="pad",
|
||
|
[RTA_UID]="uid",
|
||
|
[RTA_TTL_PROPAGATE]="ttl_propagate",
|
||
|
[RTA_IP_PROTO]="ip_proto",
|
||
|
[RTA_SPORT]="sport",
|
||
|
[RTA_DPORT]="dport",
|
||
|
[RTA_NH_ID]="nhid",
|
||
|
};
|
||
|
#endif
|
||
|
|
||
|
static int prefix_matches(const unsigned char *a, const unsigned char *b, int nbits) {
|
||
|
while(nbits >= 8) {
|
||
|
if(*a != *b) return 0;
|
||
|
a++;
|
||
|
b++;
|
||
|
nbits -= 8;
|
||
|
}
|
||
|
if(nbits == 0) return 1;
|
||
|
int mask = (1 << (8 - nbits)) & 255;
|
||
|
return (*a & mask) == (*b & mask);
|
||
|
}
|
||
|
|
||
|
enum debuglevel {
|
||
|
dbg_none,
|
||
|
dbg_relevant_routes,
|
||
|
dbg_route_operations,
|
||
|
dbg_all_routes,
|
||
|
};
|
||
|
static enum debuglevel debug = dbg_route_operations;
|
||
|
|
||
|
static char *routebuilder; // points to array with guard page afterwards
|
||
|
static char *routebuilder_pos;
|
||
|
#define routebuilder_nlhdr ((struct nlmsghdr*)routebuilder)
|
||
|
#define routebuilder_rtmsg ((struct rtmsg*)(routebuilder + NLMSG_HDRLEN))
|
||
|
static void routebuilder_init() {
|
||
|
memset(routebuilder, 0, NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(struct rtmsg)));
|
||
|
routebuilder_pos = routebuilder + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(struct rtmsg));
|
||
|
//routebuilder_nlhdr remains uninitialized until message is finalized and then sent
|
||
|
}
|
||
|
static void routebuilder_append(struct rtattr *rta) {
|
||
|
memset(routebuilder_pos, 0, RTA_ALIGN(rta->rta_len));
|
||
|
memcpy(routebuilder_pos, rta, rta->rta_len);
|
||
|
routebuilder_pos += RTA_ALIGN(rta->rta_len);
|
||
|
}
|
||
|
|
||
|
static void *xmemdup(void *x, size_t len) {
|
||
|
void *result = malloc(len);
|
||
|
if(!result) error(1, errno, "malloc");
|
||
|
memcpy(result, x, len);
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
// Start deletion of an ipv4 route not associated with any ipv6 route
|
||
|
static void start_deleting_unexpected_ipv4_route(struct nlmsghdr *msg) {
|
||
|
// TODO
|
||
|
}
|
||
|
|
||
|
static void route_start_deleting_old(struct route *r) {
|
||
|
if(debug >= dbg_route_operations) {
|
||
|
char addrbuf[64];
|
||
|
if(!inet_ntop(AF_INET, &route_addrs[r - routes], addrbuf, sizeof addrbuf))
|
||
|
strcpy(addrbuf, "?");
|
||
|
fprintf(stderr, "try to delete a route to %s/%d\n", addrbuf, route_masklens[r - routes]);
|
||
|
}
|
||
|
|
||
|
assert(r->ipv4_rtnl);
|
||
|
r->outstanding_seq = next_sequence();
|
||
|
r->state = rtst_deleting_old;
|
||
|
r->ipv4_rtnl->nlmsg_type = RTM_DELROUTE;
|
||
|
r->ipv4_rtnl->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
|
||
|
r->ipv4_rtnl->nlmsg_seq = r->outstanding_seq;
|
||
|
if(sendto(nlsock, r->ipv4_rtnl, r->ipv4_rtnl->nlmsg_len, 0, (const struct sockaddr*)&kernel_receiver, sizeof kernel_receiver) < 0)
|
||
|
error(1, errno, "sendto (delete ipv4 route)");
|
||
|
}
|
||
|
|
||
|
static void route_start_creating_new(struct route *r) {
|
||
|
if(debug >= dbg_route_operations) {
|
||
|
char addrbuf[64];
|
||
|
if(!inet_ntop(AF_INET, &route_addrs[r - routes], addrbuf, sizeof addrbuf))
|
||
|
strcpy(addrbuf, "?");
|
||
|
fprintf(stderr, "try to create a route to %s/%d\n", addrbuf, route_masklens[r - routes]);
|
||
|
}
|
||
|
|
||
|
assert(r->ipv4_new);
|
||
|
r->outstanding_seq = next_sequence();
|
||
|
r->state = rtst_creating_new;
|
||
|
r->ipv4_new->nlmsg_type = RTM_NEWROUTE;
|
||
|
r->ipv4_new->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_REPLACE;
|
||
|
r->ipv4_new->nlmsg_seq = r->outstanding_seq;
|
||
|
if(sendto(nlsock, r->ipv4_new, r->ipv4_new->nlmsg_len, 0, (const struct sockaddr*)&kernel_receiver, sizeof kernel_receiver) < 0)
|
||
|
error(1, errno, "sendto (create ipv4 route)");
|
||
|
}
|
||
|
|
||
|
static struct rtattr *find_rtattr(struct nlmsghdr *a, int attrnum) {
|
||
|
int remlen = a->nlmsg_len - NLMSG_HDRLEN - NLMSG_ALIGN(sizeof(struct rtmsg));
|
||
|
struct rtattr *pos = (struct rtattr*)((char*)a + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(struct rtmsg)));
|
||
|
while(RTA_OK(pos, remlen)) {
|
||
|
if(pos->rta_type == attrnum)
|
||
|
return pos;
|
||
|
pos = RTA_NEXT(pos, remlen);
|
||
|
}
|
||
|
return NULL;
|
||
|
}
|
||
|
static int is_current_route_attr_good(struct nlmsghdr *a, struct nlmsghdr *b, int attrnum) {
|
||
|
struct rtattr *rta = find_rtattr(a, attrnum);
|
||
|
struct rtattr *rtb = find_rtattr(b, attrnum);
|
||
|
if(!rta && !rtb) return 1;
|
||
|
if(!rta || !rtb) return 0;
|
||
|
if(rta->rta_len != rtb->rta_len) return 0;
|
||
|
return !memcmp(RTA_DATA(rta), RTA_DATA(rtb), rta->rta_len - RTA_ALIGN(sizeof(struct rtattr)));
|
||
|
}
|
||
|
static int is_current_route_good(struct nlmsghdr *a, struct nlmsghdr *b) {
|
||
|
if(!a && !b) return 1;
|
||
|
if(!a || !b) return 0;
|
||
|
|
||
|
// both must be valid route messages. nlmsg type and flags ignored.
|
||
|
assert(a->nlmsg_len >= NLMSG_SPACE(sizeof(struct rtmsg)));
|
||
|
assert(b->nlmsg_len >= NLMSG_SPACE(sizeof(struct rtmsg)));
|
||
|
struct rtmsg *rma = NLMSG_DATA(a);
|
||
|
struct rtmsg *rmb = NLMSG_DATA(b);
|
||
|
#define STR2(x) #x
|
||
|
#define check(field) if(rma->field != rmb->field) {fprintf(stderr, "mismatch field %s\n", STR2(field)); return 0;}
|
||
|
check(rtm_dst_len);
|
||
|
check(rtm_family);
|
||
|
check(rtm_flags);
|
||
|
check(rtm_protocol);
|
||
|
check(rtm_scope);
|
||
|
check(rtm_table);
|
||
|
check(rtm_type);
|
||
|
#undef check
|
||
|
#define check(attr) if(!is_current_route_attr_good(a, b, attr)) {fprintf(stderr, "mismatch attr %s\n", STR2(attr)); return 0;}
|
||
|
// RTM_TABLE *not* checked since it's duplicated in rtm_table?
|
||
|
check(RTA_OIF);
|
||
|
check(RTA_VIA);
|
||
|
check(RTA_PRIORITY);
|
||
|
check(RTA_DST);
|
||
|
check(RTA_MULTIPATH); // looks like the kernel can't reorder entries. It does seem to unwrap it if there's only one path, though.
|
||
|
#undef check
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
int main() {
|
||
|
|
||
|
nlsock = socket(AF_NETLINK, SOCK_DGRAM | SOCK_CLOEXEC, NETLINK_ROUTE);
|
||
|
if(nlsock < 0) error(1, errno, "failed to create rtnetlink socket");
|
||
|
|
||
|
{
|
||
|
int pagesize = getpagesize();
|
||
|
// TODO: should allocate an appropriately calculated number of pages instead of 2 pages
|
||
|
routebuilder = mmap(NULL, pagesize*3, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||
|
if(!routebuilder) error(1, errno, "mmap");
|
||
|
if(!mmap(routebuilder + pagesize*2, pagesize, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)) error(1, errno, "mmap");
|
||
|
}
|
||
|
|
||
|
{
|
||
|
int extack = 1;
|
||
|
if(setsockopt(nlsock, SOL_NETLINK, NETLINK_EXT_ACK, &extack, sizeof extack) < 0)
|
||
|
error(0, errno, "setsockopt (netlink ext ack)"); // non-fatal error
|
||
|
}
|
||
|
|
||
|
if(bind(nlsock, (struct sockaddr*)&bindaddr, sizeof bindaddr) < 0)
|
||
|
error(1, errno, "failed to bind netlink socket");
|
||
|
|
||
|
uint32_t dump_sequence = next_sequence();
|
||
|
cur_state = state_dumping_ipv6;
|
||
|
request_dump("ipv6", AF_INET6, dump_sequence);
|
||
|
|
||
|
while(1) {
|
||
|
// Always after processing any message:
|
||
|
// (occurs at start of loop so that we can see if any timed actions are needed)
|
||
|
if(cur_state == state_realtime) {
|
||
|
for(int i = 0; i < routes_used; i++) {
|
||
|
struct route *r = &routes[i];
|
||
|
//fprintf(stderr, "check route %d state %d\n", i, r->state);
|
||
|
|
||
|
if (!r->ipv6_rtnl) {
|
||
|
// an unexpected ipv4 route being deleted
|
||
|
assert(!r->ipv4_new);
|
||
|
if(!r->ipv4_rtnl) {
|
||
|
// route has been deleted from kernel
|
||
|
if(debug >= dbg_route_operations) fprintf(stderr, "route deleted from kernel - deleting from table\n");
|
||
|
remove_route_from_table(i);
|
||
|
i--;
|
||
|
continue;
|
||
|
} else if(r->state == rtst_unknown) {
|
||
|
route_start_deleting_old(r);
|
||
|
}
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if(r->state == rtst_unknown) {
|
||
|
// check if the route needs any update
|
||
|
if(is_current_route_good(r->ipv4_rtnl, r->ipv4_new)) {
|
||
|
r->state = rtst_stable;
|
||
|
} else if(r->ipv4_rtnl) {
|
||
|
route_start_deleting_old(r);
|
||
|
} else {
|
||
|
route_start_creating_new(r);
|
||
|
}
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if(r->state & 1) {
|
||
|
// TODO: check for timeouts
|
||
|
}
|
||
|
if(r->state == rtst_error) {
|
||
|
// TODO: check for error timeout
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
char buf[65536];
|
||
|
struct sockaddr_nl sender = {0};
|
||
|
socklen_t sender_len = sizeof sender;
|
||
|
|
||
|
int nrecv = recvfrom(nlsock, buf, sizeof buf, 0, (struct sockaddr*)&sender, &sender_len);
|
||
|
if(nrecv < 0) {
|
||
|
if(errno == EINTR)
|
||
|
continue;
|
||
|
// TODO: on ENOBUFS, restart and resynchronize
|
||
|
error(1, errno, "recvfrom netlink");
|
||
|
}
|
||
|
|
||
|
if(sender.nl_family != AF_NETLINK || sender.nl_pid != 0) {
|
||
|
fprintf(stderr, "got netlink packet not from kernel (sender=%d)\n", sender.nl_family == AF_NETLINK ? (int)sender.nl_pid : -1);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
int remaining_len = nrecv;
|
||
|
struct nlmsghdr *hdr = (struct nlmsghdr*)buf;
|
||
|
for(; NLMSG_OK(hdr, remaining_len) && NLMSG_PAYLOAD(hdr, remaining_len) >= sizeof(struct rtmsg);
|
||
|
hdr = NLMSG_NEXT(hdr, remaining_len)
|
||
|
) {
|
||
|
|
||
|
// invalid check; dump responses show our own PID
|
||
|
/*if(hdr->nlmsg_pid != 0)*/ if(0) {
|
||
|
fprintf(stderr, "got netlink message not from kernel (sender=%d)\n", (int)hdr->nlmsg_pid);
|
||
|
skip_nlmsg:
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
switch(hdr->nlmsg_type) {
|
||
|
case NLMSG_NOOP:
|
||
|
break;
|
||
|
case NLMSG_ERROR:
|
||
|
if(hdr->nlmsg_len < NLMSG_ALIGN(sizeof(struct nlmsgerr)) + NLMSG_HDRLEN)
|
||
|
fprintf(stderr, "got bad NLMSG_ERROR length\n");
|
||
|
else {
|
||
|
struct nlmsgerr *err = NLMSG_DATA(hdr);
|
||
|
int i;
|
||
|
for(i = 0; i < routes_used; i++) {
|
||
|
if((routes[i].state & 1) && (err->msg.nlmsg_seq == routes[i].outstanding_seq)) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if(i >= routes_used) {
|
||
|
fprintf(stderr, "got NLMSG_ERROR for unknown request (sequence=%d)\n", (int)err->msg.nlmsg_seq);
|
||
|
} else {
|
||
|
struct route *r = &routes[i];
|
||
|
char addrbuf[64];
|
||
|
if(!inet_ntop(AF_INET, &route_addrs[i], addrbuf, sizeof addrbuf))
|
||
|
strcpy(addrbuf, "?");
|
||
|
|
||
|
switch(r->state) {
|
||
|
case rtst_creating_new:
|
||
|
if(err->error == 0) {
|
||
|
if(debug >= dbg_route_operations) fprintf(stderr, "successfully created a route to %s/%d\n", addrbuf, route_masklens[i]);
|
||
|
if(r->ipv4_rtnl != r->ipv4_new) {
|
||
|
free(r->ipv4_rtnl);
|
||
|
r->ipv4_rtnl = r->ipv4_new;
|
||
|
}
|
||
|
r->state = rtst_unknown;
|
||
|
} else {
|
||
|
if(debug >= dbg_none) fprintf(stderr, "failed to create a route to %s/%d\n", addrbuf, route_masklens[i]);
|
||
|
r->state = rtst_error;
|
||
|
}
|
||
|
break;
|
||
|
case rtst_deleting_old:
|
||
|
if(err->error == 0) {
|
||
|
if(debug >= dbg_route_operations) fprintf(stderr, "successfully deleted a route to %s/%d\n", addrbuf, route_masklens[i]);
|
||
|
if(r->ipv4_rtnl != r->ipv4_new)
|
||
|
free(r->ipv4_rtnl);
|
||
|
r->ipv4_rtnl = NULL;
|
||
|
r->state = rtst_unknown;
|
||
|
} else {
|
||
|
if(debug >= dbg_none) fprintf(stderr, "failed to delete a route to %s/%d\n", addrbuf, route_masklens[i]);
|
||
|
r->state = rtst_error;
|
||
|
}
|
||
|
break;
|
||
|
default: break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
case NLMSG_DONE:
|
||
|
if((cur_state == state_dumping_ipv4 || cur_state == state_dumping_ipv6) && hdr->nlmsg_seq == dump_sequence) {
|
||
|
if(cur_state == state_dumping_ipv6) {
|
||
|
// next step is ipv4
|
||
|
dump_sequence = next_sequence();
|
||
|
cur_state = state_dumping_ipv4;
|
||
|
request_dump("ipv4", AF_INET, dump_sequence);
|
||
|
} else if(cur_state == state_dumping_ipv4) {
|
||
|
cur_state = state_realtime;
|
||
|
}
|
||
|
} else {
|
||
|
fprintf(stderr, "unexpected NLMSG_DONE sequence=%d my_state=%d my_sequence=%d\n", hdr->nlmsg_seq, (int)cur_state, (int)dump_sequence);
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
printf("got unexpected nlmsg type 0x%x\n", hdr->nlmsg_type);
|
||
|
break;
|
||
|
case RTM_NEWROUTE:
|
||
|
case RTM_DELROUTE:
|
||
|
#if 0
|
||
|
if(hdr->nlmsg_type == RTM_NEWROUTE)
|
||
|
printf("got RTM_NEWROUTE ");
|
||
|
else
|
||
|
printf("got RTM_DELROUTE ");
|
||
|
#endif
|
||
|
|
||
|
// When a route is removed, the kernel sends a RTM_DELROUTE with the same attributes
|
||
|
// table dst prefsrc priority gateway oif cacheinfo pref
|
||
|
// table dst prefsrc priority multipath cacheinfo pref
|
||
|
// table dst priority oif cacheinfo pref
|
||
|
|
||
|
struct rtmsg *rt = NLMSG_DATA(hdr);
|
||
|
|
||
|
// whether it's from a dump or not, we can check whether it's a route we can process
|
||
|
// and ignore it if not. we also extract pointers to the attributes we might care about.
|
||
|
struct rtattr *oif = NULL; // output interface
|
||
|
struct rtattr *gateway = NULL;
|
||
|
struct rtattr *via = NULL;
|
||
|
struct rtattr *priority = NULL; // aka metric
|
||
|
struct rtattr *dst = NULL;
|
||
|
struct rtattr *multipath = NULL;
|
||
|
struct rtattr *table = NULL;
|
||
|
{
|
||
|
struct rtattr *rta = (struct rtattr*)((char*)rt + NLMSG_ALIGN(sizeof(struct rtmsg)));
|
||
|
int rta_remaining_len = hdr->nlmsg_len - NLMSG_HDRLEN - NLMSG_ALIGN(sizeof(struct rtmsg));
|
||
|
|
||
|
while(RTA_OK(rta, rta_remaining_len)) {
|
||
|
switch(rta->rta_type) {
|
||
|
case RTA_PREFSRC:
|
||
|
case RTA_CACHEINFO:
|
||
|
case RTA_EXPIRES:
|
||
|
case RTA_METRICS:
|
||
|
case RTA_PREF:
|
||
|
// ignored attributes
|
||
|
break;
|
||
|
case RTA_TABLE:
|
||
|
table = rta;
|
||
|
if(rta->rta_len != RTA_ALIGN(sizeof(struct rtattr)) + 4) {
|
||
|
if(debug >= dbg_none) fprintf(stderr, "ignore route with wrong length for RTA_TABLE\n");
|
||
|
goto skip_nlmsg;
|
||
|
}
|
||
|
if(*(uint32_t*)RTA_DATA(rta) != RT_TABLE_MAIN) {
|
||
|
if(debug >= dbg_all_routes) fprintf(stderr, "ignore route for non-main table\n");
|
||
|
goto skip_nlmsg;
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
// unknown attributes - can't process this route
|
||
|
if(debug >= dbg_all_routes) fprintf(stderr, "ignore route with unknown attribute %d\n", (int)rta->rta_type);
|
||
|
goto skip_nlmsg;
|
||
|
case RTA_OIF: oif = rta; break;
|
||
|
case RTA_GATEWAY: gateway = rta; break;
|
||
|
case RTA_VIA: via = rta; break;
|
||
|
case RTA_PRIORITY: priority = rta; break;
|
||
|
case RTA_DST: dst = rta; break;
|
||
|
case RTA_MULTIPATH: multipath = rta; break;
|
||
|
}
|
||
|
#if 0
|
||
|
// rta->rta_type is unsigned
|
||
|
if(rta->rta_type < sizeof(rta_names)/sizeof(rta_names[0]) && rta_names[rta->rta_type]) {
|
||
|
printf("%s ", rta_names[rta->rta_type]);
|
||
|
} else {
|
||
|
printf("attr%d ", (int)rta->rta_type);
|
||
|
}
|
||
|
#endif
|
||
|
rta = RTA_NEXT(rta, rta_remaining_len);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if(!dst) {
|
||
|
// these routes *can* exist - don't know what they mean
|
||
|
if(debug >= dbg_all_routes) fprintf(stderr, "invalid route: no dst\n");
|
||
|
goto skip_nlmsg;
|
||
|
}
|
||
|
char dst_string[64];
|
||
|
if(!inet_ntop(rt->rtm_family, RTA_DATA(dst), dst_string, sizeof dst_string))
|
||
|
strcpy(dst_string, "?");
|
||
|
if(!gateway && !via && !multipath) {
|
||
|
if(debug >= dbg_all_routes) fprintf(stderr, "ignore directly connected route to %s/%d\n", dst_string, rt->rtm_dst_len);
|
||
|
goto skip_nlmsg; // directly connected routes - ignored (set up ipv4 addresses yourself on the nodes that implement them)
|
||
|
}
|
||
|
if(rt->rtm_family == AF_INET6 && via) {
|
||
|
if(debug >= dbg_all_routes) fprintf(stderr, "ignore af_inet6 with via, to %s/%d\n", dst_string, rt->rtm_dst_len);
|
||
|
goto skip_nlmsg; // don't know what this means; ipv6 with non-ipv6 next hop? we would ignore that
|
||
|
}
|
||
|
|
||
|
if(rt->rtm_family == AF_INET) {
|
||
|
assert(dst->rta_len == RTA_ALIGN(sizeof(struct rtattr))+4);
|
||
|
assert(rt->rtm_dst_len <= 32);
|
||
|
if(rt->rtm_dst_len < V4_MIN_PREFIX_LENGTH || !prefix_matches(RTA_DATA(dst), v4_reqprefix, V4_MIN_PREFIX_LENGTH)) {
|
||
|
if(debug >= dbg_all_routes) fprintf(stderr, "ignoring ipv4 route to somewhere else, to %s/%d\n", dst_string, rt->rtm_dst_len);
|
||
|
goto skip_nlmsg;
|
||
|
}
|
||
|
} else {
|
||
|
assert(rt->rtm_family == AF_INET6);
|
||
|
assert(dst->rta_len == RTA_ALIGN(sizeof(struct rtattr))+16);
|
||
|
assert(rt->rtm_dst_len <= 128);
|
||
|
if(rt->rtm_dst_len < V6_MIN_PREFIX_LENGTH || !prefix_matches(RTA_DATA(dst), v6_reqprefix, V6_MIN_PREFIX_LENGTH)) {
|
||
|
if(debug >= dbg_all_routes) fprintf(stderr, "ignoring ipv6 route to somewhere else, to %s/%d\n", dst_string, rt->rtm_dst_len);
|
||
|
goto skip_nlmsg;
|
||
|
}
|
||
|
}
|
||
|
if(debug >= dbg_relevant_routes) fprintf(stderr, "route to %s/%d matches our responsible ranges\n", dst_string, rt->rtm_dst_len);
|
||
|
|
||
|
if(hdr->nlmsg_type == RTM_NEWROUTE) {
|
||
|
//(cur_state == state_dumping_ipv4 || cur_state == state_dumping_ipv6) && hdr->nlmsg_seq == dump_sequence
|
||
|
assert(
|
||
|
(
|
||
|
(
|
||
|
(cur_state == state_dumping_ipv4 && rt->rtm_family == AF_INET)
|
||
|
|| (cur_state == state_dumping_ipv6 && rt->rtm_family == AF_INET6)
|
||
|
) && hdr->nlmsg_seq == dump_sequence
|
||
|
) || sender.nl_groups != 0 // unsolicited real-time update
|
||
|
);
|
||
|
|
||
|
if(cur_state == state_dumping_ipv4) {
|
||
|
uint32_t v4_dst = *(uint32_t*)RTA_DATA(dst); // network byte order
|
||
|
struct route *r = find_route(v4_dst, rt->rtm_dst_len);
|
||
|
if(!r) {
|
||
|
if(debug >= dbg_relevant_routes) fprintf(stderr, "didn't expect a route to %s/%d - will remove it\n", dst_string, rt->rtm_dst_len);
|
||
|
|
||
|
// TODO: handle the response
|
||
|
hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
|
||
|
hdr->nlmsg_type = RTM_DELROUTE;
|
||
|
hdr->nlmsg_pid = 0;
|
||
|
hdr->nlmsg_seq = next_sequence();
|
||
|
if(sendto(nlsock, hdr, hdr->nlmsg_len, 0, (const struct sockaddr*)&kernel_receiver, sizeof kernel_receiver) < 0)
|
||
|
error(1, errno, "sendto (delete ipv4 route)");
|
||
|
} else {
|
||
|
if(debug >= dbg_relevant_routes) fprintf(stderr, "expected a route to %s/%d - will leave it\n", dst_string, rt->rtm_dst_len);
|
||
|
|
||
|
if(r->ipv4_rtnl) {
|
||
|
if(sender.nl_groups == 0) { // unicast packet - response to dump request
|
||
|
fprintf(stderr, "route to %s/%d: saw it twice; deleting the extra one\n", dst_string, rt->rtm_dst_len);
|
||
|
start_deleting_unexpected_ipv4_route(hdr);
|
||
|
} // otherwise we probably got another update for the same route? TODO: check it's actually identical. if not, delete one.
|
||
|
} else {
|
||
|
// save a copy of the kernel's route so we can compare it against what we want it to be
|
||
|
r->ipv4_rtnl = xmemdup(hdr, NLMSG_ALIGN(hdr->nlmsg_len));
|
||
|
if(r->state == rtst_stable || r->state == rtst_error)
|
||
|
r->state = rtst_unknown;
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
uint32_t v4_dst = *(uint32_t*)(12 + (char*)RTA_DATA(dst));
|
||
|
struct route *r = find_route(v4_dst, rt->rtm_dst_len - 96);
|
||
|
if(r) {
|
||
|
// this could cause problems because only one route is recorded in our data structures
|
||
|
if(debug >= dbg_none) fprintf(stderr, "duplicate v6 route to %s/%d?\n", dst_string, rt->rtm_dst_len);
|
||
|
} else {
|
||
|
if(!gateway && !multipath) {
|
||
|
// didn't we check this above?
|
||
|
if(debug >= dbg_none) fprintf(stderr, "can't process v6 route to %s/%d because no gateway or multipath attribute\n", dst_string, rt->rtm_dst_len);
|
||
|
} else {
|
||
|
if(debug >= dbg_relevant_routes) fprintf(stderr, "recording a v6 route to %s/%d?\n", dst_string, rt->rtm_dst_len);
|
||
|
r = append_route(v4_dst, rt->rtm_dst_len - 96);
|
||
|
r->ipv6_rtnl = xmemdup(hdr, NLMSG_ALIGN(hdr->nlmsg_len));
|
||
|
|
||
|
// format the corresponding ipv4 route
|
||
|
routebuilder_init();
|
||
|
routebuilder_rtmsg->rtm_family = AF_INET;
|
||
|
routebuilder_rtmsg->rtm_dst_len = rt->rtm_dst_len - 96;
|
||
|
routebuilder_rtmsg->rtm_table = rt->rtm_table;
|
||
|
routebuilder_rtmsg->rtm_protocol = RTPROT_OSPF6TO4;
|
||
|
routebuilder_rtmsg->rtm_scope = rt->rtm_scope;
|
||
|
routebuilder_rtmsg->rtm_type = rt->rtm_type;
|
||
|
routebuilder_rtmsg->rtm_flags = rt->rtm_flags; // TODO: are all flags always applicable?
|
||
|
|
||
|
struct {
|
||
|
struct rtattr hdr;
|
||
|
uint32_t dst;
|
||
|
} dst_attrib = {
|
||
|
{.rta_len = sizeof dst_attrib, .rta_type = RTA_DST},
|
||
|
v4_dst
|
||
|
};
|
||
|
routebuilder_append(&dst_attrib.hdr);
|
||
|
|
||
|
if(multipath) {
|
||
|
|
||
|
struct rtattr *multipath_hdr = (struct rtattr*)routebuilder_pos;
|
||
|
// rta_len to be filled in at end
|
||
|
multipath_hdr->rta_type = RTA_MULTIPATH;
|
||
|
routebuilder_pos += RTA_SPACE(0);
|
||
|
|
||
|
// RTA_MULTIPATH contains an array of rtnexthop + payload
|
||
|
// payload of rtnexthop is an array of rtattr + payload; we have to translate RTA_GATEWAY into RTA_VIA.
|
||
|
int nexthops_remaining_size = multipath->rta_len - RTA_ALIGN(sizeof(struct rtattr));
|
||
|
|
||
|
for(struct rtnexthop *rtnh = (struct rtnexthop*)RTA_DATA(multipath);
|
||
|
nexthops_remaining_size >= sizeof(struct rtnexthop) && RTNH_OK(rtnh, nexthops_remaining_size);
|
||
|
nexthops_remaining_size -= RTNH_ALIGN(rtnh->rtnh_len), rtnh = RTNH_NEXT(rtnh)
|
||
|
) {
|
||
|
struct rtnexthop *rtnh_out = (struct rtnexthop*)routebuilder_pos;
|
||
|
routebuilder_pos += RTNH_ALIGN(sizeof(struct rtnexthop));
|
||
|
memset(rtnh_out, 0, RTNH_ALIGN(sizeof(struct rtnexthop)));
|
||
|
rtnh_out->rtnh_flags = rtnh->rtnh_flags;
|
||
|
rtnh_out->rtnh_hops = rtnh->rtnh_hops;
|
||
|
rtnh_out->rtnh_ifindex = rtnh->rtnh_ifindex;
|
||
|
// rtnh_len assigned after payload emitted
|
||
|
|
||
|
int nhattrs_remaining_size = rtnh->rtnh_len - RTNH_ALIGN(sizeof(struct rtnexthop));
|
||
|
for(struct rtattr *nha = RTNH_DATA(rtnh);
|
||
|
RTA_OK(nha, nhattrs_remaining_size);
|
||
|
nha = RTA_NEXT(nha, nhattrs_remaining_size)
|
||
|
) {
|
||
|
if(nha->rta_type == RTA_GATEWAY) {
|
||
|
assert(nha->rta_len == RTA_SPACE(16));
|
||
|
struct {
|
||
|
struct rtattr hdr;
|
||
|
struct { // struct rtvia (specialized for 16-byte address)
|
||
|
__kernel_sa_family_t family;
|
||
|
uint8_t addr[16];
|
||
|
} via;
|
||
|
} via_attrib = {
|
||
|
{.rta_len = sizeof via_attrib, .rta_type = RTA_VIA},
|
||
|
{AF_INET6}
|
||
|
};
|
||
|
memcpy(via_attrib.via.addr, RTA_DATA(nha), 16);
|
||
|
routebuilder_append(&via_attrib.hdr);
|
||
|
} else {
|
||
|
routebuilder_append(nha);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
rtnh_out->rtnh_len = routebuilder_pos - (char*)rtnh_out;
|
||
|
}
|
||
|
multipath_hdr->rta_len = routebuilder_pos - (char*)multipath_hdr;
|
||
|
} else {
|
||
|
assert(gateway);
|
||
|
assert(gateway->rta_len == RTA_SPACE(16));
|
||
|
struct {
|
||
|
struct rtattr hdr;
|
||
|
struct { // struct rtvia (specialized for 16-byte address)
|
||
|
__kernel_sa_family_t family;
|
||
|
uint8_t addr[16];
|
||
|
} via;
|
||
|
} via_attrib = {
|
||
|
{.rta_len = sizeof via_attrib, .rta_type = RTA_VIA},
|
||
|
{AF_INET6}
|
||
|
};
|
||
|
memcpy(via_attrib.via.addr, RTA_DATA(gateway), 16);
|
||
|
routebuilder_append(&via_attrib.hdr);
|
||
|
}
|
||
|
|
||
|
if(oif) routebuilder_append(oif);
|
||
|
if(priority) routebuilder_append(priority);
|
||
|
if(table) routebuilder_append(table);
|
||
|
|
||
|
routebuilder_nlhdr->nlmsg_len = routebuilder_pos - routebuilder;
|
||
|
|
||
|
r->ipv4_new = xmemdup(routebuilder, NLMSG_ALIGN(routebuilder_nlhdr->nlmsg_len));
|
||
|
if(r->state == rtst_stable || r->state == rtst_error)
|
||
|
r->state = rtst_unknown;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
} else if(hdr->nlmsg_type == RTM_DELROUTE && rt->rtm_family == AF_INET6) {
|
||
|
uint32_t v4_dst = *(uint32_t*)(12 + (char*)RTA_DATA(dst));
|
||
|
struct route *r = find_route(v4_dst, rt->rtm_dst_len - 96);
|
||
|
if(r) {
|
||
|
if(debug >= dbg_relevant_routes) fprintf(stderr, "route to %s/%d was deleted\n", dst_string, rt->rtm_dst_len);
|
||
|
|
||
|
free(r->ipv6_rtnl);
|
||
|
r->ipv6_rtnl = NULL;
|
||
|
|
||
|
// have to update intended ipv4 state (which is no route)
|
||
|
if(r->ipv4_new != r->ipv4_rtnl)
|
||
|
free(r->ipv4_new);
|
||
|
r->ipv4_new = NULL;
|
||
|
|
||
|
if(r->state == rtst_stable || r->state == rtst_error)
|
||
|
r->state = rtst_unknown;
|
||
|
} else {
|
||
|
if(debug >= dbg_none) fprintf(stderr, "didn't know about deleted v6 route to %s/%d\n", dst_string, rt->rtm_dst_len);
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*for(int i = 0; i < nrecv; i++) {
|
||
|
printf("%02x ", (unsigned char)buf[i]);
|
||
|
if((i & 15) == 15) printf("\n");
|
||
|
}
|
||
|
printf("\n\n");*/
|
||
|
}
|
||
|
}
|