#include #include #include #include #include #include #include #include #include #include #include #include #include // Route translation: // IPv6 prefix: fd7a:81db:3c57:fffe:0000:0000:xxxx:xxxx // IPv6 prefix length must be at least 96 plus min ipv4 prefix, or the route is ignored. // IPv4 part of IPv6 prefix must match the IPv4 prefix // IPv4 address is always last 32 bits of IPv6 address. In the usual case, /128 routes are used. // Routes with unknown attributes are ignored. In particular: // RTA_TABLE (must be RT_TABLE_MAIN if present) // RTA_SRC, RTA_IIF (only used when querying? not sure) // RTA_ENCAP_TYPE, RTA_ENCAP (TODO: these could be copied, but testing would be needed) // RTA_NH_ID (TODO? what is this? alternate way of specifying nexthop?) // RTA_VIA (not seen on ipv6 routes; used to specify ipv4 nexthop on ipv6 routes) // Attributes that are copied are: // RTA_OIF (output interface) // RTA_PRIORITY (aka metric) // RTA_GATEWAY (converted to RTM_VIA for ipv4 table, but holds the same address) // RTA_MULTIPATH (RTM_GATEWAY within gets converted to RTM_VIA for ipv4 table, but holds the same address. all other sub-attributes copied.) // Attributes with very special handling are: // RTA_DST // Attributes that are knowingly ignored are: // RTA_PREFSRC // RTA_CACHEINFO // RTA_EXPIRES // RTA_PREF (RFC4191, IPv6-only) // Only manage routes to this part of the v4 address space (172.23.101.144/28) const unsigned char v4_reqprefix[4] = {0xac, 0x17, 0x65, 0x90}; #define V4_MIN_PREFIX_LENGTH 28 const unsigned char v6_reqprefix[16] = {0xfd, 0x7a, 0x81, 0xdb, 0x3c, 0x57, 0xff, 0xfe, 0,0,0,0, 0xac,0x17,0x65,0x90}; #define V6_MIN_PREFIX_LENGTH (96+V4_MIN_PREFIX_LENGTH) #define RTPROT_OSPF6TO4 RTPROT_EIGRP // grab some otherwise-unused value static const struct sockaddr_nl kernel_receiver = {.nl_family = AF_NETLINK, .nl_pid = 0}; static int nlsock; static struct sockaddr_nl bindaddr = { .nl_family = AF_NETLINK, .nl_groups = RTMGRP_IPV6_ROUTE, }; static void request_dump(const char *label, int af, int sequence) { struct { struct nlmsghdr nlhdr; struct rtmsg rthdr; } request = { .nlhdr = { .nlmsg_len = sizeof request, .nlmsg_type = RTM_GETROUTE, .nlmsg_flags = NLM_F_REQUEST|NLM_F_DUMP, .nlmsg_seq = sequence, .nlmsg_pid = 0,//bindaddr.nl_pid, // either works? }, .rthdr = { .rtm_family = af, .rtm_dst_len = 0, // dst prefix mask bit length .rtm_src_len = 0, // src prefix mask bit length .rtm_tos = 0, // unused? .rtm_table = RT_TABLE_MAIN, .rtm_protocol = RTPROT_UNSPEC, // TODO would be a useful filter, eg RTPROT_BIRD .rtm_scope = RT_SCOPE_UNIVERSE, .rtm_type = RTN_UNSPEC, .rtm_flags = 0, }, }; if(sendto(nlsock, &request, sizeof request, 0, (struct sockaddr*)&kernel_receiver, sizeof kernel_receiver) < 0) error(1, errno, "failed to send %s route dump request", label); } // First: dump ipv6 routes // Then: dump ipv4 routes and remove any that don't make sense. // Then: process updates as they come in. static enum { state_dumping_ipv6, state_dumping_ipv4, state_realtime, } cur_state; // There's one structure per ipv6 route that we translate. // (not per ipv4 route if it doesn't have an associated ipv6 route - we delete those immediately) // There should only be a small number of relevant routes, so we should be fine storing them in an unsorted dynamic array. // at least we do use a cache-efficient SoA data structure to hold the keys. // IPv6 routes are stored paired with their IPv4 route. Both are stored in direct rtnetlink format, even though it's // difficult to manipulate. struct route { struct nlmsghdr *ipv6_rtnl; // directly as received from rtnetlink struct nlmsghdr *ipv4_rtnl; // what the kernel knows at the moment. NULL if no route in table. struct nlmsghdr *ipv4_new; // as we want it to be. If up-to-date, ipv4_new==ipv4_rtnl - be careful not to double-free. // Odd-numbered states are waiting for an acknowledgement from the kernel. Sequence number is always stored in outstanding_seq. Otherwise it's meaningless. enum route_state { rtst_unknown = 0, rtst_deleting_old = 1, rtst_creating_new = 3, rtst_stable = 4, rtst_error = 6, } state; uint32_t outstanding_seq; }; static struct route *routes; static uint32_t *route_addrs; // primary key; network byte order static unsigned char *route_masklens; // secondary key static unsigned int routes_used; static unsigned int routes_allocated; static struct route *find_route(uint32_t ipv4_addr, int masklen) { for(int i = 0; i < routes_used; i++) { if(route_addrs[i] == ipv4_addr && route_masklens[i] == masklen) return &routes[i]; } return NULL; } static struct route *append_route(uint32_t ipv4_addr, int masklen) { if(routes_used == routes_allocated) { unsigned int new_alloc = routes_allocated + (routes_allocated >> 1); if(new_alloc < routes_allocated || new_alloc > (1<<30)/sizeof(*routes)) error(1, 0, "memory allocation overflow (too many routes)"); if(new_alloc < 5) new_alloc = 5; routes = realloc(routes, new_alloc*sizeof(*routes)); route_addrs = realloc(route_addrs, new_alloc*sizeof(*route_addrs)); route_masklens = realloc(route_masklens, new_alloc*sizeof(*route_masklens)); if(!routes || !route_addrs || !route_masklens) error(1, 0, "failed to allocate memory for routes"); routes_allocated = new_alloc; } route_addrs[routes_used] = ipv4_addr; route_masklens[routes_used] = masklen; routes_used++; struct route *r = &routes[routes_used-1]; memset(r, 0, sizeof(*r)); return r; } static void remove_route_from_table(unsigned int index) { assert(index < routes_used); struct route *r = &routes[index]; free(r->ipv6_rtnl); if(r->ipv4_new != r->ipv4_rtnl) free(r->ipv4_new); free(r->ipv4_rtnl); if(index != routes_used - 1) { assert(routes_used > 0); *r = routes[routes_used - 1]; route_addrs[index] = route_addrs[routes_used - 1]; route_masklens[index] = route_masklens[routes_used - 1]; } routes_used--; } static uint32_t next_sequence() { static uint32_t next = 1; next++; if(next == 0) next = 1; // don't use 0 // we just assume that sequence numbers won't wrap around until the previous request using that number has completed return next; } #if 0 static const char *const rta_names[] = { // don't know what all of these mean, but they're defined [RTA_DST]="dst", [RTA_SRC]="src", [RTA_IIF]="iif", [RTA_OIF]="oif", [RTA_GATEWAY]="gateway", [RTA_PRIORITY]="priority", [RTA_PREFSRC]="prefsrc", // this one can't be transferred to ipv4 [RTA_METRICS]="metrics", [RTA_MULTIPATH]="multipath", // TODO [RTA_FLOW]="flow", [RTA_CACHEINFO]="cacheinfo", [RTA_TABLE]="table", [RTA_MARK]="mark", [RTA_MFC_STATS]="mfc_stats", [RTA_VIA]="via", [RTA_NEWDST]="newdst", [RTA_PREF]="pref", [RTA_ENCAP_TYPE]="encaptype", [RTA_ENCAP]="encap", [RTA_EXPIRES]="expires", [RTA_PAD]="pad", [RTA_UID]="uid", [RTA_TTL_PROPAGATE]="ttl_propagate", [RTA_IP_PROTO]="ip_proto", [RTA_SPORT]="sport", [RTA_DPORT]="dport", [RTA_NH_ID]="nhid", }; #endif static int prefix_matches(const unsigned char *a, const unsigned char *b, int nbits) { while(nbits >= 8) { if(*a != *b) return 0; a++; b++; nbits -= 8; } if(nbits == 0) return 1; int mask = (1 << (8 - nbits)) & 255; return (*a & mask) == (*b & mask); } enum debuglevel { dbg_none, dbg_relevant_routes, dbg_route_operations, dbg_all_routes, }; static enum debuglevel debug = dbg_route_operations; static char *routebuilder; // points to array with guard page afterwards static char *routebuilder_pos; #define routebuilder_nlhdr ((struct nlmsghdr*)routebuilder) #define routebuilder_rtmsg ((struct rtmsg*)(routebuilder + NLMSG_HDRLEN)) static void routebuilder_init() { memset(routebuilder, 0, NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(struct rtmsg))); routebuilder_pos = routebuilder + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(struct rtmsg)); //routebuilder_nlhdr remains uninitialized until message is finalized and then sent } static void routebuilder_append(struct rtattr *rta) { memset(routebuilder_pos, 0, RTA_ALIGN(rta->rta_len)); memcpy(routebuilder_pos, rta, rta->rta_len); routebuilder_pos += RTA_ALIGN(rta->rta_len); } static void *xmemdup(void *x, size_t len) { void *result = malloc(len); if(!result) error(1, errno, "malloc"); memcpy(result, x, len); return result; } // Start deletion of an ipv4 route not associated with any ipv6 route static void start_deleting_unexpected_ipv4_route(struct nlmsghdr *msg) { // TODO } static void route_start_deleting_old(struct route *r) { if(debug >= dbg_route_operations) { char addrbuf[64]; if(!inet_ntop(AF_INET, &route_addrs[r - routes], addrbuf, sizeof addrbuf)) strcpy(addrbuf, "?"); fprintf(stderr, "try to delete a route to %s/%d\n", addrbuf, route_masklens[r - routes]); } assert(r->ipv4_rtnl); r->outstanding_seq = next_sequence(); r->state = rtst_deleting_old; r->ipv4_rtnl->nlmsg_type = RTM_DELROUTE; r->ipv4_rtnl->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; r->ipv4_rtnl->nlmsg_seq = r->outstanding_seq; if(sendto(nlsock, r->ipv4_rtnl, r->ipv4_rtnl->nlmsg_len, 0, (const struct sockaddr*)&kernel_receiver, sizeof kernel_receiver) < 0) error(1, errno, "sendto (delete ipv4 route)"); } static void route_start_creating_new(struct route *r) { if(debug >= dbg_route_operations) { char addrbuf[64]; if(!inet_ntop(AF_INET, &route_addrs[r - routes], addrbuf, sizeof addrbuf)) strcpy(addrbuf, "?"); fprintf(stderr, "try to create a route to %s/%d\n", addrbuf, route_masklens[r - routes]); } assert(r->ipv4_new); r->outstanding_seq = next_sequence(); r->state = rtst_creating_new; r->ipv4_new->nlmsg_type = RTM_NEWROUTE; r->ipv4_new->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_REPLACE; r->ipv4_new->nlmsg_seq = r->outstanding_seq; if(sendto(nlsock, r->ipv4_new, r->ipv4_new->nlmsg_len, 0, (const struct sockaddr*)&kernel_receiver, sizeof kernel_receiver) < 0) error(1, errno, "sendto (create ipv4 route)"); } static struct rtattr *find_rtattr(struct nlmsghdr *a, int attrnum) { int remlen = a->nlmsg_len - NLMSG_HDRLEN - NLMSG_ALIGN(sizeof(struct rtmsg)); struct rtattr *pos = (struct rtattr*)((char*)a + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(struct rtmsg))); while(RTA_OK(pos, remlen)) { if(pos->rta_type == attrnum) return pos; pos = RTA_NEXT(pos, remlen); } return NULL; } static int is_current_route_attr_good(struct nlmsghdr *a, struct nlmsghdr *b, int attrnum) { struct rtattr *rta = find_rtattr(a, attrnum); struct rtattr *rtb = find_rtattr(b, attrnum); if(!rta && !rtb) return 1; if(!rta || !rtb) return 0; if(rta->rta_len != rtb->rta_len) return 0; return !memcmp(RTA_DATA(rta), RTA_DATA(rtb), rta->rta_len - RTA_ALIGN(sizeof(struct rtattr))); } static int is_current_route_good(struct nlmsghdr *a, struct nlmsghdr *b) { if(!a && !b) return 1; if(!a || !b) return 0; // both must be valid route messages. nlmsg type and flags ignored. assert(a->nlmsg_len >= NLMSG_SPACE(sizeof(struct rtmsg))); assert(b->nlmsg_len >= NLMSG_SPACE(sizeof(struct rtmsg))); struct rtmsg *rma = NLMSG_DATA(a); struct rtmsg *rmb = NLMSG_DATA(b); #define STR2(x) #x #define check(field) if(rma->field != rmb->field) {fprintf(stderr, "mismatch field %s\n", STR2(field)); return 0;} check(rtm_dst_len); check(rtm_family); check(rtm_flags); check(rtm_protocol); check(rtm_scope); check(rtm_table); check(rtm_type); #undef check #define check(attr) if(!is_current_route_attr_good(a, b, attr)) {fprintf(stderr, "mismatch attr %s\n", STR2(attr)); return 0;} // RTM_TABLE *not* checked since it's duplicated in rtm_table? check(RTA_OIF); check(RTA_VIA); check(RTA_PRIORITY); check(RTA_DST); check(RTA_MULTIPATH); // looks like the kernel can't reorder entries. It does seem to unwrap it if there's only one path, though. #undef check return 1; } int main() { nlsock = socket(AF_NETLINK, SOCK_DGRAM | SOCK_CLOEXEC, NETLINK_ROUTE); if(nlsock < 0) error(1, errno, "failed to create rtnetlink socket"); { int pagesize = getpagesize(); // TODO: should allocate an appropriately calculated number of pages instead of 2 pages routebuilder = mmap(NULL, pagesize*3, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if(!routebuilder) error(1, errno, "mmap"); if(!mmap(routebuilder + pagesize*2, pagesize, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)) error(1, errno, "mmap"); } { int extack = 1; if(setsockopt(nlsock, SOL_NETLINK, NETLINK_EXT_ACK, &extack, sizeof extack) < 0) error(0, errno, "setsockopt (netlink ext ack)"); // non-fatal error } if(bind(nlsock, (struct sockaddr*)&bindaddr, sizeof bindaddr) < 0) error(1, errno, "failed to bind netlink socket"); uint32_t dump_sequence = next_sequence(); cur_state = state_dumping_ipv6; request_dump("ipv6", AF_INET6, dump_sequence); while(1) { // Always after processing any message: // (occurs at start of loop so that we can see if any timed actions are needed) if(cur_state == state_realtime) { for(int i = 0; i < routes_used; i++) { struct route *r = &routes[i]; //fprintf(stderr, "check route %d state %d\n", i, r->state); if (!r->ipv6_rtnl) { // an unexpected ipv4 route being deleted assert(!r->ipv4_new); if(!r->ipv4_rtnl) { // route has been deleted from kernel if(debug >= dbg_route_operations) fprintf(stderr, "route deleted from kernel - deleting from table\n"); remove_route_from_table(i); i--; continue; } else if(r->state == rtst_unknown) { route_start_deleting_old(r); } continue; } if(r->state == rtst_unknown) { // check if the route needs any update if(is_current_route_good(r->ipv4_rtnl, r->ipv4_new)) { r->state = rtst_stable; } else if(r->ipv4_rtnl) { route_start_deleting_old(r); } else { route_start_creating_new(r); } continue; } if(r->state & 1) { // TODO: check for timeouts } if(r->state == rtst_error) { // TODO: check for error timeout } } } char buf[65536]; struct sockaddr_nl sender = {0}; socklen_t sender_len = sizeof sender; int nrecv = recvfrom(nlsock, buf, sizeof buf, 0, (struct sockaddr*)&sender, &sender_len); if(nrecv < 0) { if(errno == EINTR) continue; // TODO: on ENOBUFS, restart and resynchronize error(1, errno, "recvfrom netlink"); } if(sender.nl_family != AF_NETLINK || sender.nl_pid != 0) { fprintf(stderr, "got netlink packet not from kernel (sender=%d)\n", sender.nl_family == AF_NETLINK ? (int)sender.nl_pid : -1); continue; } int remaining_len = nrecv; struct nlmsghdr *hdr = (struct nlmsghdr*)buf; for(; NLMSG_OK(hdr, remaining_len) && NLMSG_PAYLOAD(hdr, remaining_len) >= sizeof(struct rtmsg); hdr = NLMSG_NEXT(hdr, remaining_len) ) { // invalid check; dump responses show our own PID /*if(hdr->nlmsg_pid != 0)*/ if(0) { fprintf(stderr, "got netlink message not from kernel (sender=%d)\n", (int)hdr->nlmsg_pid); skip_nlmsg: continue; } switch(hdr->nlmsg_type) { case NLMSG_NOOP: break; case NLMSG_ERROR: if(hdr->nlmsg_len < NLMSG_ALIGN(sizeof(struct nlmsgerr)) + NLMSG_HDRLEN) fprintf(stderr, "got bad NLMSG_ERROR length\n"); else { struct nlmsgerr *err = NLMSG_DATA(hdr); int i; for(i = 0; i < routes_used; i++) { if((routes[i].state & 1) && (err->msg.nlmsg_seq == routes[i].outstanding_seq)) { break; } } if(i >= routes_used) { fprintf(stderr, "got NLMSG_ERROR for unknown request (sequence=%d)\n", (int)err->msg.nlmsg_seq); } else { struct route *r = &routes[i]; char addrbuf[64]; if(!inet_ntop(AF_INET, &route_addrs[i], addrbuf, sizeof addrbuf)) strcpy(addrbuf, "?"); switch(r->state) { case rtst_creating_new: if(err->error == 0) { if(debug >= dbg_route_operations) fprintf(stderr, "successfully created a route to %s/%d\n", addrbuf, route_masklens[i]); if(r->ipv4_rtnl != r->ipv4_new) { free(r->ipv4_rtnl); r->ipv4_rtnl = r->ipv4_new; } r->state = rtst_unknown; } else { if(debug >= dbg_none) fprintf(stderr, "failed to create a route to %s/%d\n", addrbuf, route_masklens[i]); r->state = rtst_error; } break; case rtst_deleting_old: if(err->error == 0) { if(debug >= dbg_route_operations) fprintf(stderr, "successfully deleted a route to %s/%d\n", addrbuf, route_masklens[i]); if(r->ipv4_rtnl != r->ipv4_new) free(r->ipv4_rtnl); r->ipv4_rtnl = NULL; r->state = rtst_unknown; } else { if(debug >= dbg_none) fprintf(stderr, "failed to delete a route to %s/%d\n", addrbuf, route_masklens[i]); r->state = rtst_error; } break; default: break; } } } break; case NLMSG_DONE: if((cur_state == state_dumping_ipv4 || cur_state == state_dumping_ipv6) && hdr->nlmsg_seq == dump_sequence) { if(cur_state == state_dumping_ipv6) { // next step is ipv4 dump_sequence = next_sequence(); cur_state = state_dumping_ipv4; request_dump("ipv4", AF_INET, dump_sequence); } else if(cur_state == state_dumping_ipv4) { cur_state = state_realtime; } } else { fprintf(stderr, "unexpected NLMSG_DONE sequence=%d my_state=%d my_sequence=%d\n", hdr->nlmsg_seq, (int)cur_state, (int)dump_sequence); } break; default: printf("got unexpected nlmsg type 0x%x\n", hdr->nlmsg_type); break; case RTM_NEWROUTE: case RTM_DELROUTE: #if 0 if(hdr->nlmsg_type == RTM_NEWROUTE) printf("got RTM_NEWROUTE "); else printf("got RTM_DELROUTE "); #endif // When a route is removed, the kernel sends a RTM_DELROUTE with the same attributes // table dst prefsrc priority gateway oif cacheinfo pref // table dst prefsrc priority multipath cacheinfo pref // table dst priority oif cacheinfo pref struct rtmsg *rt = NLMSG_DATA(hdr); // whether it's from a dump or not, we can check whether it's a route we can process // and ignore it if not. we also extract pointers to the attributes we might care about. struct rtattr *oif = NULL; // output interface struct rtattr *gateway = NULL; struct rtattr *via = NULL; struct rtattr *priority = NULL; // aka metric struct rtattr *dst = NULL; struct rtattr *multipath = NULL; struct rtattr *table = NULL; { struct rtattr *rta = (struct rtattr*)((char*)rt + NLMSG_ALIGN(sizeof(struct rtmsg))); int rta_remaining_len = hdr->nlmsg_len - NLMSG_HDRLEN - NLMSG_ALIGN(sizeof(struct rtmsg)); while(RTA_OK(rta, rta_remaining_len)) { switch(rta->rta_type) { case RTA_PREFSRC: case RTA_CACHEINFO: case RTA_EXPIRES: case RTA_METRICS: case RTA_PREF: // ignored attributes break; case RTA_TABLE: table = rta; if(rta->rta_len != RTA_ALIGN(sizeof(struct rtattr)) + 4) { if(debug >= dbg_none) fprintf(stderr, "ignore route with wrong length for RTA_TABLE\n"); goto skip_nlmsg; } if(*(uint32_t*)RTA_DATA(rta) != RT_TABLE_MAIN) { if(debug >= dbg_all_routes) fprintf(stderr, "ignore route for non-main table\n"); goto skip_nlmsg; } break; default: // unknown attributes - can't process this route if(debug >= dbg_all_routes) fprintf(stderr, "ignore route with unknown attribute %d\n", (int)rta->rta_type); goto skip_nlmsg; case RTA_OIF: oif = rta; break; case RTA_GATEWAY: gateway = rta; break; case RTA_VIA: via = rta; break; case RTA_PRIORITY: priority = rta; break; case RTA_DST: dst = rta; break; case RTA_MULTIPATH: multipath = rta; break; } #if 0 // rta->rta_type is unsigned if(rta->rta_type < sizeof(rta_names)/sizeof(rta_names[0]) && rta_names[rta->rta_type]) { printf("%s ", rta_names[rta->rta_type]); } else { printf("attr%d ", (int)rta->rta_type); } #endif rta = RTA_NEXT(rta, rta_remaining_len); } } if(!dst) { // these routes *can* exist - don't know what they mean if(debug >= dbg_all_routes) fprintf(stderr, "invalid route: no dst\n"); goto skip_nlmsg; } char dst_string[64]; if(!inet_ntop(rt->rtm_family, RTA_DATA(dst), dst_string, sizeof dst_string)) strcpy(dst_string, "?"); if(!gateway && !via && !multipath) { if(debug >= dbg_all_routes) fprintf(stderr, "ignore directly connected route to %s/%d\n", dst_string, rt->rtm_dst_len); goto skip_nlmsg; // directly connected routes - ignored (set up ipv4 addresses yourself on the nodes that implement them) } if(rt->rtm_family == AF_INET6 && via) { if(debug >= dbg_all_routes) fprintf(stderr, "ignore af_inet6 with via, to %s/%d\n", dst_string, rt->rtm_dst_len); goto skip_nlmsg; // don't know what this means; ipv6 with non-ipv6 next hop? we would ignore that } if(rt->rtm_family == AF_INET) { assert(dst->rta_len == RTA_ALIGN(sizeof(struct rtattr))+4); assert(rt->rtm_dst_len <= 32); if(rt->rtm_dst_len < V4_MIN_PREFIX_LENGTH || !prefix_matches(RTA_DATA(dst), v4_reqprefix, V4_MIN_PREFIX_LENGTH)) { if(debug >= dbg_all_routes) fprintf(stderr, "ignoring ipv4 route to somewhere else, to %s/%d\n", dst_string, rt->rtm_dst_len); goto skip_nlmsg; } } else { assert(rt->rtm_family == AF_INET6); assert(dst->rta_len == RTA_ALIGN(sizeof(struct rtattr))+16); assert(rt->rtm_dst_len <= 128); if(rt->rtm_dst_len < V6_MIN_PREFIX_LENGTH || !prefix_matches(RTA_DATA(dst), v6_reqprefix, V6_MIN_PREFIX_LENGTH)) { if(debug >= dbg_all_routes) fprintf(stderr, "ignoring ipv6 route to somewhere else, to %s/%d\n", dst_string, rt->rtm_dst_len); goto skip_nlmsg; } } if(debug >= dbg_relevant_routes) fprintf(stderr, "route to %s/%d matches our responsible ranges\n", dst_string, rt->rtm_dst_len); if(hdr->nlmsg_type == RTM_NEWROUTE) { //(cur_state == state_dumping_ipv4 || cur_state == state_dumping_ipv6) && hdr->nlmsg_seq == dump_sequence assert( ( ( (cur_state == state_dumping_ipv4 && rt->rtm_family == AF_INET) || (cur_state == state_dumping_ipv6 && rt->rtm_family == AF_INET6) ) && hdr->nlmsg_seq == dump_sequence ) || sender.nl_groups != 0 // unsolicited real-time update ); if(cur_state == state_dumping_ipv4) { uint32_t v4_dst = *(uint32_t*)RTA_DATA(dst); // network byte order struct route *r = find_route(v4_dst, rt->rtm_dst_len); if(!r) { if(debug >= dbg_relevant_routes) fprintf(stderr, "didn't expect a route to %s/%d - will remove it\n", dst_string, rt->rtm_dst_len); // TODO: handle the response hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; hdr->nlmsg_type = RTM_DELROUTE; hdr->nlmsg_pid = 0; hdr->nlmsg_seq = next_sequence(); if(sendto(nlsock, hdr, hdr->nlmsg_len, 0, (const struct sockaddr*)&kernel_receiver, sizeof kernel_receiver) < 0) error(1, errno, "sendto (delete ipv4 route)"); } else { if(debug >= dbg_relevant_routes) fprintf(stderr, "expected a route to %s/%d - will leave it\n", dst_string, rt->rtm_dst_len); if(r->ipv4_rtnl) { if(sender.nl_groups == 0) { // unicast packet - response to dump request fprintf(stderr, "route to %s/%d: saw it twice; deleting the extra one\n", dst_string, rt->rtm_dst_len); start_deleting_unexpected_ipv4_route(hdr); } // otherwise we probably got another update for the same route? TODO: check it's actually identical. if not, delete one. } else { // save a copy of the kernel's route so we can compare it against what we want it to be r->ipv4_rtnl = xmemdup(hdr, NLMSG_ALIGN(hdr->nlmsg_len)); if(r->state == rtst_stable || r->state == rtst_error) r->state = rtst_unknown; } } } else { uint32_t v4_dst = *(uint32_t*)(12 + (char*)RTA_DATA(dst)); struct route *r = find_route(v4_dst, rt->rtm_dst_len - 96); if(r) { // this could cause problems because only one route is recorded in our data structures if(debug >= dbg_none) fprintf(stderr, "duplicate v6 route to %s/%d?\n", dst_string, rt->rtm_dst_len); } else { if(!gateway && !multipath) { // didn't we check this above? if(debug >= dbg_none) fprintf(stderr, "can't process v6 route to %s/%d because no gateway or multipath attribute\n", dst_string, rt->rtm_dst_len); } else { if(debug >= dbg_relevant_routes) fprintf(stderr, "recording a v6 route to %s/%d?\n", dst_string, rt->rtm_dst_len); r = append_route(v4_dst, rt->rtm_dst_len - 96); r->ipv6_rtnl = xmemdup(hdr, NLMSG_ALIGN(hdr->nlmsg_len)); // format the corresponding ipv4 route routebuilder_init(); routebuilder_rtmsg->rtm_family = AF_INET; routebuilder_rtmsg->rtm_dst_len = rt->rtm_dst_len - 96; routebuilder_rtmsg->rtm_table = rt->rtm_table; routebuilder_rtmsg->rtm_protocol = RTPROT_OSPF6TO4; routebuilder_rtmsg->rtm_scope = rt->rtm_scope; routebuilder_rtmsg->rtm_type = rt->rtm_type; routebuilder_rtmsg->rtm_flags = rt->rtm_flags; // TODO: are all flags always applicable? struct { struct rtattr hdr; uint32_t dst; } dst_attrib = { {.rta_len = sizeof dst_attrib, .rta_type = RTA_DST}, v4_dst }; routebuilder_append(&dst_attrib.hdr); if(multipath) { struct rtattr *multipath_hdr = (struct rtattr*)routebuilder_pos; // rta_len to be filled in at end multipath_hdr->rta_type = RTA_MULTIPATH; routebuilder_pos += RTA_SPACE(0); // RTA_MULTIPATH contains an array of rtnexthop + payload // payload of rtnexthop is an array of rtattr + payload; we have to translate RTA_GATEWAY into RTA_VIA. int nexthops_remaining_size = multipath->rta_len - RTA_ALIGN(sizeof(struct rtattr)); for(struct rtnexthop *rtnh = (struct rtnexthop*)RTA_DATA(multipath); nexthops_remaining_size >= sizeof(struct rtnexthop) && RTNH_OK(rtnh, nexthops_remaining_size); nexthops_remaining_size -= RTNH_ALIGN(rtnh->rtnh_len), rtnh = RTNH_NEXT(rtnh) ) { struct rtnexthop *rtnh_out = (struct rtnexthop*)routebuilder_pos; routebuilder_pos += RTNH_ALIGN(sizeof(struct rtnexthop)); memset(rtnh_out, 0, RTNH_ALIGN(sizeof(struct rtnexthop))); rtnh_out->rtnh_flags = rtnh->rtnh_flags; rtnh_out->rtnh_hops = rtnh->rtnh_hops; rtnh_out->rtnh_ifindex = rtnh->rtnh_ifindex; // rtnh_len assigned after payload emitted int nhattrs_remaining_size = rtnh->rtnh_len - RTNH_ALIGN(sizeof(struct rtnexthop)); for(struct rtattr *nha = RTNH_DATA(rtnh); RTA_OK(nha, nhattrs_remaining_size); nha = RTA_NEXT(nha, nhattrs_remaining_size) ) { if(nha->rta_type == RTA_GATEWAY) { assert(nha->rta_len == RTA_SPACE(16)); struct { struct rtattr hdr; struct { // struct rtvia (specialized for 16-byte address) __kernel_sa_family_t family; uint8_t addr[16]; } via; } via_attrib = { {.rta_len = sizeof via_attrib, .rta_type = RTA_VIA}, {AF_INET6} }; memcpy(via_attrib.via.addr, RTA_DATA(nha), 16); routebuilder_append(&via_attrib.hdr); } else { routebuilder_append(nha); } } rtnh_out->rtnh_len = routebuilder_pos - (char*)rtnh_out; } multipath_hdr->rta_len = routebuilder_pos - (char*)multipath_hdr; } else { assert(gateway); assert(gateway->rta_len == RTA_SPACE(16)); struct { struct rtattr hdr; struct { // struct rtvia (specialized for 16-byte address) __kernel_sa_family_t family; uint8_t addr[16]; } via; } via_attrib = { {.rta_len = sizeof via_attrib, .rta_type = RTA_VIA}, {AF_INET6} }; memcpy(via_attrib.via.addr, RTA_DATA(gateway), 16); routebuilder_append(&via_attrib.hdr); } if(oif) routebuilder_append(oif); if(priority) routebuilder_append(priority); if(table) routebuilder_append(table); routebuilder_nlhdr->nlmsg_len = routebuilder_pos - routebuilder; r->ipv4_new = xmemdup(routebuilder, NLMSG_ALIGN(routebuilder_nlhdr->nlmsg_len)); if(r->state == rtst_stable || r->state == rtst_error) r->state = rtst_unknown; } } } } else if(hdr->nlmsg_type == RTM_DELROUTE && rt->rtm_family == AF_INET6) { uint32_t v4_dst = *(uint32_t*)(12 + (char*)RTA_DATA(dst)); struct route *r = find_route(v4_dst, rt->rtm_dst_len - 96); if(r) { if(debug >= dbg_relevant_routes) fprintf(stderr, "route to %s/%d was deleted\n", dst_string, rt->rtm_dst_len); free(r->ipv6_rtnl); r->ipv6_rtnl = NULL; // have to update intended ipv4 state (which is no route) if(r->ipv4_new != r->ipv4_rtnl) free(r->ipv4_new); r->ipv4_new = NULL; if(r->state == rtst_stable || r->state == rtst_error) r->state = rtst_unknown; } else { if(debug >= dbg_none) fprintf(stderr, "didn't know about deleted v6 route to %s/%d\n", dst_string, rt->rtm_dst_len); } } break; } } /*for(int i = 0; i < nrecv; i++) { printf("%02x ", (unsigned char)buf[i]); if((i & 15) == 15) printf("\n"); } printf("\n\n");*/ } }