commit 39ff04554f7f14394b2d0e129bf203fad06ab5e4 Author: immibis Date: Mon Jan 13 18:21:10 2025 +0100 create git repository diff --git a/ospf6to4.c b/ospf6to4.c new file mode 100644 index 0000000..a457e94 --- /dev/null +++ b/ospf6to4.c @@ -0,0 +1,798 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Route translation: +// IPv6 prefix: fd7a:81db:3c57:fffe:0000:0000:xxxx:xxxx +// IPv6 prefix length must be at least 96 plus min ipv4 prefix, or the route is ignored. +// IPv4 part of IPv6 prefix must match the IPv4 prefix +// IPv4 address is always last 32 bits of IPv6 address. In the usual case, /128 routes are used. + +// Routes with unknown attributes are ignored. In particular: +// RTA_TABLE (must be RT_TABLE_MAIN if present) +// RTA_SRC, RTA_IIF (only used when querying? not sure) +// RTA_ENCAP_TYPE, RTA_ENCAP (TODO: these could be copied, but testing would be needed) +// RTA_NH_ID (TODO? what is this? alternate way of specifying nexthop?) +// RTA_VIA (not seen on ipv6 routes; used to specify ipv4 nexthop on ipv6 routes) +// Attributes that are copied are: +// RTA_OIF (output interface) +// RTA_PRIORITY (aka metric) +// RTA_GATEWAY (converted to RTM_VIA for ipv4 table, but holds the same address) +// RTA_MULTIPATH (RTM_GATEWAY within gets converted to RTM_VIA for ipv4 table, but holds the same address. all other sub-attributes copied.) +// Attributes with very special handling are: +// RTA_DST +// Attributes that are knowingly ignored are: +// RTA_PREFSRC +// RTA_CACHEINFO +// RTA_EXPIRES +// RTA_PREF (RFC4191, IPv6-only) + +// Only manage routes to this part of the v4 address space (172.23.101.144/28) +const unsigned char v4_reqprefix[4] = {0xac, 0x17, 0x65, 0x90}; +#define V4_MIN_PREFIX_LENGTH 28 +const unsigned char v6_reqprefix[16] = {0xfd, 0x7a, 0x81, 0xdb, 0x3c, 0x57, 0xff, 0xfe, 0,0,0,0, 0xac,0x17,0x65,0x90}; +#define V6_MIN_PREFIX_LENGTH (96+V4_MIN_PREFIX_LENGTH) + +#define RTPROT_OSPF6TO4 RTPROT_EIGRP // grab some otherwise-unused value + + +static const struct sockaddr_nl kernel_receiver = {.nl_family = AF_NETLINK, .nl_pid = 0}; +static int nlsock; +static struct sockaddr_nl bindaddr = { + .nl_family = AF_NETLINK, + .nl_groups = RTMGRP_IPV6_ROUTE, +}; + +static void request_dump(const char *label, int af, int sequence) { + struct { + struct nlmsghdr nlhdr; + struct rtmsg rthdr; + } request = { + .nlhdr = { + .nlmsg_len = sizeof request, + .nlmsg_type = RTM_GETROUTE, + .nlmsg_flags = NLM_F_REQUEST|NLM_F_DUMP, + .nlmsg_seq = sequence, + .nlmsg_pid = 0,//bindaddr.nl_pid, // either works? + }, + .rthdr = { + .rtm_family = af, + .rtm_dst_len = 0, // dst prefix mask bit length + .rtm_src_len = 0, // src prefix mask bit length + .rtm_tos = 0, // unused? + + .rtm_table = RT_TABLE_MAIN, + .rtm_protocol = RTPROT_UNSPEC, // TODO would be a useful filter, eg RTPROT_BIRD + .rtm_scope = RT_SCOPE_UNIVERSE, + .rtm_type = RTN_UNSPEC, + + .rtm_flags = 0, + }, + }; + if(sendto(nlsock, &request, sizeof request, 0, (struct sockaddr*)&kernel_receiver, sizeof kernel_receiver) < 0) + error(1, errno, "failed to send %s route dump request", label); +} + +// First: dump ipv6 routes +// Then: dump ipv4 routes and remove any that don't make sense. +// Then: process updates as they come in. +static enum { + state_dumping_ipv6, + state_dumping_ipv4, + state_realtime, +} cur_state; + +// There's one structure per ipv6 route that we translate. +// (not per ipv4 route if it doesn't have an associated ipv6 route - we delete those immediately) +// There should only be a small number of relevant routes, so we should be fine storing them in an unsorted dynamic array. +// at least we do use a cache-efficient SoA data structure to hold the keys. +// IPv6 routes are stored paired with their IPv4 route. Both are stored in direct rtnetlink format, even though it's +// difficult to manipulate. +struct route { + struct nlmsghdr *ipv6_rtnl; // directly as received from rtnetlink + struct nlmsghdr *ipv4_rtnl; // what the kernel knows at the moment. NULL if no route in table. + struct nlmsghdr *ipv4_new; // as we want it to be. If up-to-date, ipv4_new==ipv4_rtnl - be careful not to double-free. + + // Odd-numbered states are waiting for an acknowledgement from the kernel. Sequence number is always stored in outstanding_seq. Otherwise it's meaningless. + enum route_state { + rtst_unknown = 0, + rtst_deleting_old = 1, + rtst_creating_new = 3, + rtst_stable = 4, + rtst_error = 6, + } state; + uint32_t outstanding_seq; +}; +static struct route *routes; +static uint32_t *route_addrs; // primary key; network byte order +static unsigned char *route_masklens; // secondary key +static unsigned int routes_used; +static unsigned int routes_allocated; + +static struct route *find_route(uint32_t ipv4_addr, int masklen) { + for(int i = 0; i < routes_used; i++) { + if(route_addrs[i] == ipv4_addr && route_masklens[i] == masklen) + return &routes[i]; + } + return NULL; +} +static struct route *append_route(uint32_t ipv4_addr, int masklen) { + if(routes_used == routes_allocated) { + unsigned int new_alloc = routes_allocated + (routes_allocated >> 1); + if(new_alloc < routes_allocated || new_alloc > (1<<30)/sizeof(*routes)) + error(1, 0, "memory allocation overflow (too many routes)"); + if(new_alloc < 5) new_alloc = 5; + routes = realloc(routes, new_alloc*sizeof(*routes)); + route_addrs = realloc(route_addrs, new_alloc*sizeof(*route_addrs)); + route_masklens = realloc(route_masklens, new_alloc*sizeof(*route_masklens)); + if(!routes || !route_addrs || !route_masklens) + error(1, 0, "failed to allocate memory for routes"); + routes_allocated = new_alloc; + } + route_addrs[routes_used] = ipv4_addr; + route_masklens[routes_used] = masklen; + routes_used++; + struct route *r = &routes[routes_used-1]; + memset(r, 0, sizeof(*r)); + return r; +} +static void remove_route_from_table(unsigned int index) { + assert(index < routes_used); + struct route *r = &routes[index]; + + free(r->ipv6_rtnl); + if(r->ipv4_new != r->ipv4_rtnl) free(r->ipv4_new); + free(r->ipv4_rtnl); + + if(index != routes_used - 1) { + assert(routes_used > 0); + *r = routes[routes_used - 1]; + route_addrs[index] = route_addrs[routes_used - 1]; + route_masklens[index] = route_masklens[routes_used - 1]; + } + routes_used--; +} + +static uint32_t next_sequence() { + static uint32_t next = 1; + next++; + if(next == 0) next = 1; // don't use 0 + // we just assume that sequence numbers won't wrap around until the previous request using that number has completed + return next; +} + +#if 0 +static const char *const rta_names[] = { + // don't know what all of these mean, but they're defined + [RTA_DST]="dst", + [RTA_SRC]="src", + [RTA_IIF]="iif", + [RTA_OIF]="oif", + [RTA_GATEWAY]="gateway", + [RTA_PRIORITY]="priority", + [RTA_PREFSRC]="prefsrc", // this one can't be transferred to ipv4 + [RTA_METRICS]="metrics", + [RTA_MULTIPATH]="multipath", // TODO + [RTA_FLOW]="flow", + [RTA_CACHEINFO]="cacheinfo", + [RTA_TABLE]="table", + [RTA_MARK]="mark", + [RTA_MFC_STATS]="mfc_stats", + [RTA_VIA]="via", + [RTA_NEWDST]="newdst", + [RTA_PREF]="pref", + [RTA_ENCAP_TYPE]="encaptype", + [RTA_ENCAP]="encap", + [RTA_EXPIRES]="expires", + [RTA_PAD]="pad", + [RTA_UID]="uid", + [RTA_TTL_PROPAGATE]="ttl_propagate", + [RTA_IP_PROTO]="ip_proto", + [RTA_SPORT]="sport", + [RTA_DPORT]="dport", + [RTA_NH_ID]="nhid", +}; +#endif + +static int prefix_matches(const unsigned char *a, const unsigned char *b, int nbits) { + while(nbits >= 8) { + if(*a != *b) return 0; + a++; + b++; + nbits -= 8; + } + if(nbits == 0) return 1; + int mask = (1 << (8 - nbits)) & 255; + return (*a & mask) == (*b & mask); +} + +enum debuglevel { + dbg_none, + dbg_relevant_routes, + dbg_route_operations, + dbg_all_routes, +}; +static enum debuglevel debug = dbg_route_operations; + +static char *routebuilder; // points to array with guard page afterwards +static char *routebuilder_pos; +#define routebuilder_nlhdr ((struct nlmsghdr*)routebuilder) +#define routebuilder_rtmsg ((struct rtmsg*)(routebuilder + NLMSG_HDRLEN)) +static void routebuilder_init() { + memset(routebuilder, 0, NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(struct rtmsg))); + routebuilder_pos = routebuilder + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(struct rtmsg)); + //routebuilder_nlhdr remains uninitialized until message is finalized and then sent +} +static void routebuilder_append(struct rtattr *rta) { + memset(routebuilder_pos, 0, RTA_ALIGN(rta->rta_len)); + memcpy(routebuilder_pos, rta, rta->rta_len); + routebuilder_pos += RTA_ALIGN(rta->rta_len); +} + +static void *xmemdup(void *x, size_t len) { + void *result = malloc(len); + if(!result) error(1, errno, "malloc"); + memcpy(result, x, len); + return result; +} + +// Start deletion of an ipv4 route not associated with any ipv6 route +static void start_deleting_unexpected_ipv4_route(struct nlmsghdr *msg) { + // TODO +} + +static void route_start_deleting_old(struct route *r) { + if(debug >= dbg_route_operations) { + char addrbuf[64]; + if(!inet_ntop(AF_INET, &route_addrs[r - routes], addrbuf, sizeof addrbuf)) + strcpy(addrbuf, "?"); + fprintf(stderr, "try to delete a route to %s/%d\n", addrbuf, route_masklens[r - routes]); + } + + assert(r->ipv4_rtnl); + r->outstanding_seq = next_sequence(); + r->state = rtst_deleting_old; + r->ipv4_rtnl->nlmsg_type = RTM_DELROUTE; + r->ipv4_rtnl->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + r->ipv4_rtnl->nlmsg_seq = r->outstanding_seq; + if(sendto(nlsock, r->ipv4_rtnl, r->ipv4_rtnl->nlmsg_len, 0, (const struct sockaddr*)&kernel_receiver, sizeof kernel_receiver) < 0) + error(1, errno, "sendto (delete ipv4 route)"); +} + +static void route_start_creating_new(struct route *r) { + if(debug >= dbg_route_operations) { + char addrbuf[64]; + if(!inet_ntop(AF_INET, &route_addrs[r - routes], addrbuf, sizeof addrbuf)) + strcpy(addrbuf, "?"); + fprintf(stderr, "try to create a route to %s/%d\n", addrbuf, route_masklens[r - routes]); + } + + assert(r->ipv4_new); + r->outstanding_seq = next_sequence(); + r->state = rtst_creating_new; + r->ipv4_new->nlmsg_type = RTM_NEWROUTE; + r->ipv4_new->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_REPLACE; + r->ipv4_new->nlmsg_seq = r->outstanding_seq; + if(sendto(nlsock, r->ipv4_new, r->ipv4_new->nlmsg_len, 0, (const struct sockaddr*)&kernel_receiver, sizeof kernel_receiver) < 0) + error(1, errno, "sendto (create ipv4 route)"); +} + +static struct rtattr *find_rtattr(struct nlmsghdr *a, int attrnum) { + int remlen = a->nlmsg_len - NLMSG_HDRLEN - NLMSG_ALIGN(sizeof(struct rtmsg)); + struct rtattr *pos = (struct rtattr*)((char*)a + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(struct rtmsg))); + while(RTA_OK(pos, remlen)) { + if(pos->rta_type == attrnum) + return pos; + pos = RTA_NEXT(pos, remlen); + } + return NULL; +} +static int is_current_route_attr_good(struct nlmsghdr *a, struct nlmsghdr *b, int attrnum) { + struct rtattr *rta = find_rtattr(a, attrnum); + struct rtattr *rtb = find_rtattr(b, attrnum); + if(!rta && !rtb) return 1; + if(!rta || !rtb) return 0; + if(rta->rta_len != rtb->rta_len) return 0; + return !memcmp(RTA_DATA(rta), RTA_DATA(rtb), rta->rta_len - RTA_ALIGN(sizeof(struct rtattr))); +} +static int is_current_route_good(struct nlmsghdr *a, struct nlmsghdr *b) { + if(!a && !b) return 1; + if(!a || !b) return 0; + + // both must be valid route messages. nlmsg type and flags ignored. + assert(a->nlmsg_len >= NLMSG_SPACE(sizeof(struct rtmsg))); + assert(b->nlmsg_len >= NLMSG_SPACE(sizeof(struct rtmsg))); + struct rtmsg *rma = NLMSG_DATA(a); + struct rtmsg *rmb = NLMSG_DATA(b); +#define STR2(x) #x +#define check(field) if(rma->field != rmb->field) {fprintf(stderr, "mismatch field %s\n", STR2(field)); return 0;} + check(rtm_dst_len); + check(rtm_family); + check(rtm_flags); + check(rtm_protocol); + check(rtm_scope); + check(rtm_table); + check(rtm_type); +#undef check +#define check(attr) if(!is_current_route_attr_good(a, b, attr)) {fprintf(stderr, "mismatch attr %s\n", STR2(attr)); return 0;} + // RTM_TABLE *not* checked since it's duplicated in rtm_table? + check(RTA_OIF); + check(RTA_VIA); + check(RTA_PRIORITY); + check(RTA_DST); + check(RTA_MULTIPATH); // looks like the kernel can't reorder entries. It does seem to unwrap it if there's only one path, though. +#undef check + return 1; +} + +int main() { + + nlsock = socket(AF_NETLINK, SOCK_DGRAM | SOCK_CLOEXEC, NETLINK_ROUTE); + if(nlsock < 0) error(1, errno, "failed to create rtnetlink socket"); + + { + int pagesize = getpagesize(); + // TODO: should allocate an appropriately calculated number of pages instead of 2 pages + routebuilder = mmap(NULL, pagesize*3, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if(!routebuilder) error(1, errno, "mmap"); + if(!mmap(routebuilder + pagesize*2, pagesize, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)) error(1, errno, "mmap"); + } + + { + int extack = 1; + if(setsockopt(nlsock, SOL_NETLINK, NETLINK_EXT_ACK, &extack, sizeof extack) < 0) + error(0, errno, "setsockopt (netlink ext ack)"); // non-fatal error + } + + if(bind(nlsock, (struct sockaddr*)&bindaddr, sizeof bindaddr) < 0) + error(1, errno, "failed to bind netlink socket"); + + uint32_t dump_sequence = next_sequence(); + cur_state = state_dumping_ipv6; + request_dump("ipv6", AF_INET6, dump_sequence); + + while(1) { + // Always after processing any message: + // (occurs at start of loop so that we can see if any timed actions are needed) + if(cur_state == state_realtime) { + for(int i = 0; i < routes_used; i++) { + struct route *r = &routes[i]; + //fprintf(stderr, "check route %d state %d\n", i, r->state); + + if (!r->ipv6_rtnl) { + // an unexpected ipv4 route being deleted + assert(!r->ipv4_new); + if(!r->ipv4_rtnl) { + // route has been deleted from kernel + if(debug >= dbg_route_operations) fprintf(stderr, "route deleted from kernel - deleting from table\n"); + remove_route_from_table(i); + i--; + continue; + } else if(r->state == rtst_unknown) { + route_start_deleting_old(r); + } + continue; + } + + if(r->state == rtst_unknown) { + // check if the route needs any update + if(is_current_route_good(r->ipv4_rtnl, r->ipv4_new)) { + r->state = rtst_stable; + } else if(r->ipv4_rtnl) { + route_start_deleting_old(r); + } else { + route_start_creating_new(r); + } + continue; + } + + if(r->state & 1) { + // TODO: check for timeouts + } + if(r->state == rtst_error) { + // TODO: check for error timeout + } + } + } + + char buf[65536]; + struct sockaddr_nl sender = {0}; + socklen_t sender_len = sizeof sender; + + int nrecv = recvfrom(nlsock, buf, sizeof buf, 0, (struct sockaddr*)&sender, &sender_len); + if(nrecv < 0) { + if(errno == EINTR) + continue; + // TODO: on ENOBUFS, restart and resynchronize + error(1, errno, "recvfrom netlink"); + } + + if(sender.nl_family != AF_NETLINK || sender.nl_pid != 0) { + fprintf(stderr, "got netlink packet not from kernel (sender=%d)\n", sender.nl_family == AF_NETLINK ? (int)sender.nl_pid : -1); + continue; + } + + int remaining_len = nrecv; + struct nlmsghdr *hdr = (struct nlmsghdr*)buf; + for(; NLMSG_OK(hdr, remaining_len) && NLMSG_PAYLOAD(hdr, remaining_len) >= sizeof(struct rtmsg); + hdr = NLMSG_NEXT(hdr, remaining_len) + ) { + + // invalid check; dump responses show our own PID + /*if(hdr->nlmsg_pid != 0)*/ if(0) { + fprintf(stderr, "got netlink message not from kernel (sender=%d)\n", (int)hdr->nlmsg_pid); + skip_nlmsg: + continue; + } + + switch(hdr->nlmsg_type) { + case NLMSG_NOOP: + break; + case NLMSG_ERROR: + if(hdr->nlmsg_len < NLMSG_ALIGN(sizeof(struct nlmsgerr)) + NLMSG_HDRLEN) + fprintf(stderr, "got bad NLMSG_ERROR length\n"); + else { + struct nlmsgerr *err = NLMSG_DATA(hdr); + int i; + for(i = 0; i < routes_used; i++) { + if((routes[i].state & 1) && (err->msg.nlmsg_seq == routes[i].outstanding_seq)) { + break; + } + } + if(i >= routes_used) { + fprintf(stderr, "got NLMSG_ERROR for unknown request (sequence=%d)\n", (int)err->msg.nlmsg_seq); + } else { + struct route *r = &routes[i]; + char addrbuf[64]; + if(!inet_ntop(AF_INET, &route_addrs[i], addrbuf, sizeof addrbuf)) + strcpy(addrbuf, "?"); + + switch(r->state) { + case rtst_creating_new: + if(err->error == 0) { + if(debug >= dbg_route_operations) fprintf(stderr, "successfully created a route to %s/%d\n", addrbuf, route_masklens[i]); + if(r->ipv4_rtnl != r->ipv4_new) { + free(r->ipv4_rtnl); + r->ipv4_rtnl = r->ipv4_new; + } + r->state = rtst_unknown; + } else { + if(debug >= dbg_none) fprintf(stderr, "failed to create a route to %s/%d\n", addrbuf, route_masklens[i]); + r->state = rtst_error; + } + break; + case rtst_deleting_old: + if(err->error == 0) { + if(debug >= dbg_route_operations) fprintf(stderr, "successfully deleted a route to %s/%d\n", addrbuf, route_masklens[i]); + if(r->ipv4_rtnl != r->ipv4_new) + free(r->ipv4_rtnl); + r->ipv4_rtnl = NULL; + r->state = rtst_unknown; + } else { + if(debug >= dbg_none) fprintf(stderr, "failed to delete a route to %s/%d\n", addrbuf, route_masklens[i]); + r->state = rtst_error; + } + break; + default: break; + } + } + } + break; + case NLMSG_DONE: + if((cur_state == state_dumping_ipv4 || cur_state == state_dumping_ipv6) && hdr->nlmsg_seq == dump_sequence) { + if(cur_state == state_dumping_ipv6) { + // next step is ipv4 + dump_sequence = next_sequence(); + cur_state = state_dumping_ipv4; + request_dump("ipv4", AF_INET, dump_sequence); + } else if(cur_state == state_dumping_ipv4) { + cur_state = state_realtime; + } + } else { + fprintf(stderr, "unexpected NLMSG_DONE sequence=%d my_state=%d my_sequence=%d\n", hdr->nlmsg_seq, (int)cur_state, (int)dump_sequence); + } + break; + default: + printf("got unexpected nlmsg type 0x%x\n", hdr->nlmsg_type); + break; + case RTM_NEWROUTE: + case RTM_DELROUTE: +#if 0 + if(hdr->nlmsg_type == RTM_NEWROUTE) + printf("got RTM_NEWROUTE "); + else + printf("got RTM_DELROUTE "); +#endif + + // When a route is removed, the kernel sends a RTM_DELROUTE with the same attributes + // table dst prefsrc priority gateway oif cacheinfo pref + // table dst prefsrc priority multipath cacheinfo pref + // table dst priority oif cacheinfo pref + + struct rtmsg *rt = NLMSG_DATA(hdr); + + // whether it's from a dump or not, we can check whether it's a route we can process + // and ignore it if not. we also extract pointers to the attributes we might care about. + struct rtattr *oif = NULL; // output interface + struct rtattr *gateway = NULL; + struct rtattr *via = NULL; + struct rtattr *priority = NULL; // aka metric + struct rtattr *dst = NULL; + struct rtattr *multipath = NULL; + struct rtattr *table = NULL; + { + struct rtattr *rta = (struct rtattr*)((char*)rt + NLMSG_ALIGN(sizeof(struct rtmsg))); + int rta_remaining_len = hdr->nlmsg_len - NLMSG_HDRLEN - NLMSG_ALIGN(sizeof(struct rtmsg)); + + while(RTA_OK(rta, rta_remaining_len)) { + switch(rta->rta_type) { + case RTA_PREFSRC: + case RTA_CACHEINFO: + case RTA_EXPIRES: + case RTA_METRICS: + case RTA_PREF: + // ignored attributes + break; + case RTA_TABLE: + table = rta; + if(rta->rta_len != RTA_ALIGN(sizeof(struct rtattr)) + 4) { + if(debug >= dbg_none) fprintf(stderr, "ignore route with wrong length for RTA_TABLE\n"); + goto skip_nlmsg; + } + if(*(uint32_t*)RTA_DATA(rta) != RT_TABLE_MAIN) { + if(debug >= dbg_all_routes) fprintf(stderr, "ignore route for non-main table\n"); + goto skip_nlmsg; + } + break; + default: + // unknown attributes - can't process this route + if(debug >= dbg_all_routes) fprintf(stderr, "ignore route with unknown attribute %d\n", (int)rta->rta_type); + goto skip_nlmsg; + case RTA_OIF: oif = rta; break; + case RTA_GATEWAY: gateway = rta; break; + case RTA_VIA: via = rta; break; + case RTA_PRIORITY: priority = rta; break; + case RTA_DST: dst = rta; break; + case RTA_MULTIPATH: multipath = rta; break; + } +#if 0 + // rta->rta_type is unsigned + if(rta->rta_type < sizeof(rta_names)/sizeof(rta_names[0]) && rta_names[rta->rta_type]) { + printf("%s ", rta_names[rta->rta_type]); + } else { + printf("attr%d ", (int)rta->rta_type); + } +#endif + rta = RTA_NEXT(rta, rta_remaining_len); + } + } + + if(!dst) { + // these routes *can* exist - don't know what they mean + if(debug >= dbg_all_routes) fprintf(stderr, "invalid route: no dst\n"); + goto skip_nlmsg; + } + char dst_string[64]; + if(!inet_ntop(rt->rtm_family, RTA_DATA(dst), dst_string, sizeof dst_string)) + strcpy(dst_string, "?"); + if(!gateway && !via && !multipath) { + if(debug >= dbg_all_routes) fprintf(stderr, "ignore directly connected route to %s/%d\n", dst_string, rt->rtm_dst_len); + goto skip_nlmsg; // directly connected routes - ignored (set up ipv4 addresses yourself on the nodes that implement them) + } + if(rt->rtm_family == AF_INET6 && via) { + if(debug >= dbg_all_routes) fprintf(stderr, "ignore af_inet6 with via, to %s/%d\n", dst_string, rt->rtm_dst_len); + goto skip_nlmsg; // don't know what this means; ipv6 with non-ipv6 next hop? we would ignore that + } + + if(rt->rtm_family == AF_INET) { + assert(dst->rta_len == RTA_ALIGN(sizeof(struct rtattr))+4); + assert(rt->rtm_dst_len <= 32); + if(rt->rtm_dst_len < V4_MIN_PREFIX_LENGTH || !prefix_matches(RTA_DATA(dst), v4_reqprefix, V4_MIN_PREFIX_LENGTH)) { + if(debug >= dbg_all_routes) fprintf(stderr, "ignoring ipv4 route to somewhere else, to %s/%d\n", dst_string, rt->rtm_dst_len); + goto skip_nlmsg; + } + } else { + assert(rt->rtm_family == AF_INET6); + assert(dst->rta_len == RTA_ALIGN(sizeof(struct rtattr))+16); + assert(rt->rtm_dst_len <= 128); + if(rt->rtm_dst_len < V6_MIN_PREFIX_LENGTH || !prefix_matches(RTA_DATA(dst), v6_reqprefix, V6_MIN_PREFIX_LENGTH)) { + if(debug >= dbg_all_routes) fprintf(stderr, "ignoring ipv6 route to somewhere else, to %s/%d\n", dst_string, rt->rtm_dst_len); + goto skip_nlmsg; + } + } + if(debug >= dbg_relevant_routes) fprintf(stderr, "route to %s/%d matches our responsible ranges\n", dst_string, rt->rtm_dst_len); + + if(hdr->nlmsg_type == RTM_NEWROUTE) { + //(cur_state == state_dumping_ipv4 || cur_state == state_dumping_ipv6) && hdr->nlmsg_seq == dump_sequence + assert( + ( + ( + (cur_state == state_dumping_ipv4 && rt->rtm_family == AF_INET) + || (cur_state == state_dumping_ipv6 && rt->rtm_family == AF_INET6) + ) && hdr->nlmsg_seq == dump_sequence + ) || sender.nl_groups != 0 // unsolicited real-time update + ); + + if(cur_state == state_dumping_ipv4) { + uint32_t v4_dst = *(uint32_t*)RTA_DATA(dst); // network byte order + struct route *r = find_route(v4_dst, rt->rtm_dst_len); + if(!r) { + if(debug >= dbg_relevant_routes) fprintf(stderr, "didn't expect a route to %s/%d - will remove it\n", dst_string, rt->rtm_dst_len); + + // TODO: handle the response + hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + hdr->nlmsg_type = RTM_DELROUTE; + hdr->nlmsg_pid = 0; + hdr->nlmsg_seq = next_sequence(); + if(sendto(nlsock, hdr, hdr->nlmsg_len, 0, (const struct sockaddr*)&kernel_receiver, sizeof kernel_receiver) < 0) + error(1, errno, "sendto (delete ipv4 route)"); + } else { + if(debug >= dbg_relevant_routes) fprintf(stderr, "expected a route to %s/%d - will leave it\n", dst_string, rt->rtm_dst_len); + + if(r->ipv4_rtnl) { + if(sender.nl_groups == 0) { // unicast packet - response to dump request + fprintf(stderr, "route to %s/%d: saw it twice; deleting the extra one\n", dst_string, rt->rtm_dst_len); + start_deleting_unexpected_ipv4_route(hdr); + } // otherwise we probably got another update for the same route? TODO: check it's actually identical. if not, delete one. + } else { + // save a copy of the kernel's route so we can compare it against what we want it to be + r->ipv4_rtnl = xmemdup(hdr, NLMSG_ALIGN(hdr->nlmsg_len)); + if(r->state == rtst_stable || r->state == rtst_error) + r->state = rtst_unknown; + } + } + } else { + uint32_t v4_dst = *(uint32_t*)(12 + (char*)RTA_DATA(dst)); + struct route *r = find_route(v4_dst, rt->rtm_dst_len - 96); + if(r) { + // this could cause problems because only one route is recorded in our data structures + if(debug >= dbg_none) fprintf(stderr, "duplicate v6 route to %s/%d?\n", dst_string, rt->rtm_dst_len); + } else { + if(!gateway && !multipath) { + // didn't we check this above? + if(debug >= dbg_none) fprintf(stderr, "can't process v6 route to %s/%d because no gateway or multipath attribute\n", dst_string, rt->rtm_dst_len); + } else { + if(debug >= dbg_relevant_routes) fprintf(stderr, "recording a v6 route to %s/%d?\n", dst_string, rt->rtm_dst_len); + r = append_route(v4_dst, rt->rtm_dst_len - 96); + r->ipv6_rtnl = xmemdup(hdr, NLMSG_ALIGN(hdr->nlmsg_len)); + + // format the corresponding ipv4 route + routebuilder_init(); + routebuilder_rtmsg->rtm_family = AF_INET; + routebuilder_rtmsg->rtm_dst_len = rt->rtm_dst_len - 96; + routebuilder_rtmsg->rtm_table = rt->rtm_table; + routebuilder_rtmsg->rtm_protocol = RTPROT_OSPF6TO4; + routebuilder_rtmsg->rtm_scope = rt->rtm_scope; + routebuilder_rtmsg->rtm_type = rt->rtm_type; + routebuilder_rtmsg->rtm_flags = rt->rtm_flags; // TODO: are all flags always applicable? + + struct { + struct rtattr hdr; + uint32_t dst; + } dst_attrib = { + {.rta_len = sizeof dst_attrib, .rta_type = RTA_DST}, + v4_dst + }; + routebuilder_append(&dst_attrib.hdr); + + if(multipath) { + + struct rtattr *multipath_hdr = (struct rtattr*)routebuilder_pos; + // rta_len to be filled in at end + multipath_hdr->rta_type = RTA_MULTIPATH; + routebuilder_pos += RTA_SPACE(0); + + // RTA_MULTIPATH contains an array of rtnexthop + payload + // payload of rtnexthop is an array of rtattr + payload; we have to translate RTA_GATEWAY into RTA_VIA. + int nexthops_remaining_size = multipath->rta_len - RTA_ALIGN(sizeof(struct rtattr)); + + for(struct rtnexthop *rtnh = (struct rtnexthop*)RTA_DATA(multipath); + nexthops_remaining_size >= sizeof(struct rtnexthop) && RTNH_OK(rtnh, nexthops_remaining_size); + nexthops_remaining_size -= RTNH_ALIGN(rtnh->rtnh_len), rtnh = RTNH_NEXT(rtnh) + ) { + struct rtnexthop *rtnh_out = (struct rtnexthop*)routebuilder_pos; + routebuilder_pos += RTNH_ALIGN(sizeof(struct rtnexthop)); + memset(rtnh_out, 0, RTNH_ALIGN(sizeof(struct rtnexthop))); + rtnh_out->rtnh_flags = rtnh->rtnh_flags; + rtnh_out->rtnh_hops = rtnh->rtnh_hops; + rtnh_out->rtnh_ifindex = rtnh->rtnh_ifindex; + // rtnh_len assigned after payload emitted + + int nhattrs_remaining_size = rtnh->rtnh_len - RTNH_ALIGN(sizeof(struct rtnexthop)); + for(struct rtattr *nha = RTNH_DATA(rtnh); + RTA_OK(nha, nhattrs_remaining_size); + nha = RTA_NEXT(nha, nhattrs_remaining_size) + ) { + if(nha->rta_type == RTA_GATEWAY) { + assert(nha->rta_len == RTA_SPACE(16)); + struct { + struct rtattr hdr; + struct { // struct rtvia (specialized for 16-byte address) + __kernel_sa_family_t family; + uint8_t addr[16]; + } via; + } via_attrib = { + {.rta_len = sizeof via_attrib, .rta_type = RTA_VIA}, + {AF_INET6} + }; + memcpy(via_attrib.via.addr, RTA_DATA(nha), 16); + routebuilder_append(&via_attrib.hdr); + } else { + routebuilder_append(nha); + } + } + + rtnh_out->rtnh_len = routebuilder_pos - (char*)rtnh_out; + } + multipath_hdr->rta_len = routebuilder_pos - (char*)multipath_hdr; + } else { + assert(gateway); + assert(gateway->rta_len == RTA_SPACE(16)); + struct { + struct rtattr hdr; + struct { // struct rtvia (specialized for 16-byte address) + __kernel_sa_family_t family; + uint8_t addr[16]; + } via; + } via_attrib = { + {.rta_len = sizeof via_attrib, .rta_type = RTA_VIA}, + {AF_INET6} + }; + memcpy(via_attrib.via.addr, RTA_DATA(gateway), 16); + routebuilder_append(&via_attrib.hdr); + } + + if(oif) routebuilder_append(oif); + if(priority) routebuilder_append(priority); + if(table) routebuilder_append(table); + + routebuilder_nlhdr->nlmsg_len = routebuilder_pos - routebuilder; + + r->ipv4_new = xmemdup(routebuilder, NLMSG_ALIGN(routebuilder_nlhdr->nlmsg_len)); + if(r->state == rtst_stable || r->state == rtst_error) + r->state = rtst_unknown; + } + } + } + + } else if(hdr->nlmsg_type == RTM_DELROUTE && rt->rtm_family == AF_INET6) { + uint32_t v4_dst = *(uint32_t*)(12 + (char*)RTA_DATA(dst)); + struct route *r = find_route(v4_dst, rt->rtm_dst_len - 96); + if(r) { + if(debug >= dbg_relevant_routes) fprintf(stderr, "route to %s/%d was deleted\n", dst_string, rt->rtm_dst_len); + + free(r->ipv6_rtnl); + r->ipv6_rtnl = NULL; + + // have to update intended ipv4 state (which is no route) + if(r->ipv4_new != r->ipv4_rtnl) + free(r->ipv4_new); + r->ipv4_new = NULL; + + if(r->state == rtst_stable || r->state == rtst_error) + r->state = rtst_unknown; + } else { + if(debug >= dbg_none) fprintf(stderr, "didn't know about deleted v6 route to %s/%d\n", dst_string, rt->rtm_dst_len); + } + } + break; + } + } + + /*for(int i = 0; i < nrecv; i++) { + printf("%02x ", (unsigned char)buf[i]); + if((i & 15) == 15) printf("\n"); + } + printf("\n\n");*/ + } +}