/* * BIRD -- Linux Netlink Interface * * (c) 1999 Martin Mares * * Can be freely distributed and used under the terms of the GNU GPL. */ #include #include #include #include #include #include #define LOCAL_DEBUG #include "nest/bird.h" #include "nest/route.h" #include "nest/protocol.h" #include "nest/iface.h" #include "lib/timer.h" #include "lib/unix.h" #include "lib/krt.h" #include "lib/socket.h" /* * We need to work around namespace conflicts between us and the kernel, * but I prefer this way to being forced to rename our configuration symbols. * This will disappear as soon as netlink headers become part of the libc. */ #undef CONFIG_NETLINK #include #ifndef CONFIG_NETLINK #error "Kernel not configured to support netlink" #endif #include #include #include #ifndef MSG_TRUNC /* FIXME: Hack to circumvent omissions in glibc includes */ #define MSG_TRUNC 0x20 #endif #ifndef RTPROT_BIRD /* FIXME: Kill after Alexey assigns as a number */ #define RTPROT_BIRD 13 #endif /* * Synchronous Netlink interface */ static int nl_sync_fd = -1; /* Unix socket for synchronous netlink actions */ static u32 nl_sync_seq; /* Sequence number of last request sent */ static byte *nl_rx_buffer; /* Receive buffer */ static int nl_rx_size = 8192; static struct nlmsghdr *nl_last_hdr; /* Recently received packet */ static unsigned int nl_last_size; static void nl_open(void) { if (nl_sync_fd < 0) { nl_sync_fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (nl_sync_fd < 0) die("Unable to open rtnetlink socket: %m"); nl_sync_seq = now; nl_rx_buffer = xmalloc(nl_rx_size); } } static void nl_send(void *rq, int size) { struct nlmsghdr *nh = rq; struct sockaddr_nl sa; memset(&sa, 0, sizeof(sa)); sa.nl_family = AF_NETLINK; nh->nlmsg_len = size; nh->nlmsg_pid = 0; nh->nlmsg_seq = ++nl_sync_seq; if (sendto(nl_sync_fd, rq, size, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0) die("rtnetlink sendto: %m"); nl_last_hdr = NULL; } static void nl_request_dump(int cmd) { struct { struct nlmsghdr nh; struct rtgenmsg g; } req; req.nh.nlmsg_type = cmd; req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; req.g.rtgen_family = PF_INET; nl_send(&req, sizeof(req)); } static struct nlmsghdr * nl_get_reply(void) { for(;;) { if (!nl_last_hdr) { struct iovec iov = { nl_rx_buffer, nl_rx_size }; struct sockaddr_nl sa; struct msghdr m = { (struct sockaddr *) &sa, sizeof(sa), &iov, 1, NULL, 0, 0 }; int x = recvmsg(nl_sync_fd, &m, 0); if (x < 0) die("nl_get_reply: %m"); if (sa.nl_pid) /* It isn't from the kernel */ { DBG("Non-kernel packet\n"); continue; } nl_last_size = x; nl_last_hdr = (void *) nl_rx_buffer; if (m.msg_flags & MSG_TRUNC) bug("nl_get_reply: got truncated reply which should be impossible"); } if (NLMSG_OK(nl_last_hdr, nl_last_size)) { struct nlmsghdr *h = nl_last_hdr; if (h->nlmsg_seq != nl_sync_seq) { log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)", h->nlmsg_seq, nl_sync_seq); continue; } nl_last_hdr = NLMSG_NEXT(h, nl_last_size); return h; } if (nl_last_size) log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl_last_size); nl_last_hdr = NULL; } } static char * nl_error(struct nlmsghdr *h) { struct nlmsgerr *e = NLMSG_DATA(h); if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) return "Error message truncated"; else return strerror(-e->error); } static struct nlmsghdr * nl_get_scan(void) { struct nlmsghdr *h = nl_get_reply(); if (h->nlmsg_type == NLMSG_DONE) return NULL; if (h->nlmsg_type == NLMSG_ERROR) { log(L_ERR "Netlink error: %s", nl_error(h)); return NULL; } return h; } /* * Parsing of Netlink attributes */ static int nl_attr_len; static void * nl_checkin(struct nlmsghdr *h, int lsize) { nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize); if (nl_attr_len < 0) { log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len); return NULL; } return NLMSG_DATA(h); } static int nl_parse_attrs(struct rtattr *a, struct rtattr **k, int ksize) { int max = ksize / sizeof(struct rtattr *); bzero(k, ksize); while (RTA_OK(a, nl_attr_len)) { if (a->rta_type < max) k[a->rta_type] = a; a = RTA_NEXT(a, nl_attr_len); } if (nl_attr_len) { log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len); return 0; } else return 1; } /* * Scanning of interfaces */ static void nl_parse_link(struct nlmsghdr *h, int scan) { struct ifinfomsg *i; struct rtattr *a[IFLA_STATS+1]; int new = h->nlmsg_type == RTM_NEWLINK; struct iface f; struct iface *ifi; char *name; u32 mtu; unsigned int fl; if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), a, sizeof(a))) return; if (!a[IFLA_IFNAME] || RTA_PAYLOAD(a[IFLA_IFNAME]) < 2 || !a[IFLA_MTU] || RTA_PAYLOAD(a[IFLA_MTU]) != 4) { log(L_ERR "nl_parse_link: Malformed message received"); return; } name = RTA_DATA(a[IFLA_IFNAME]); memcpy(&mtu, RTA_DATA(a[IFLA_MTU]), sizeof(u32)); ifi = if_find_by_index(i->ifi_index); if (!new) { DBG("KRT: IF%d(%s) goes down\n", i->ifi_index, name); if (ifi && !scan) { memcpy(&f, ifi, sizeof(struct iface)); f.flags |= IF_ADMIN_DOWN; if_update(&f); } } else { DBG("KRT: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags); if (ifi) memcpy(&f, ifi, sizeof(f)); else { bzero(&f, sizeof(f)); f.index = i->ifi_index; } strncpy(f.name, RTA_DATA(a[IFLA_IFNAME]), sizeof(f.name)-1); f.mtu = mtu; f.flags = 0; fl = i->ifi_flags; if (fl & IFF_UP) f.flags |= IF_LINK_UP; if (fl & IFF_POINTOPOINT) f.flags |= IF_UNNUMBERED | IF_MULTICAST; if (fl & IFF_LOOPBACK) f.flags |= IF_LOOPBACK | IF_IGNORE; if (fl & IFF_BROADCAST) f.flags |= IF_BROADCAST | IF_MULTICAST; if_update(&f); } } static void nl_parse_addr(struct nlmsghdr *h) { struct ifaddrmsg *i; struct rtattr *a[IFA_ANYCAST+1]; int new = h->nlmsg_type == RTM_NEWADDR; struct iface f; struct iface *ifi; if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFA_RTA(i), a, sizeof(a))) return; if (i->ifa_family != AF_INET) return; if (!a[IFA_ADDRESS] || RTA_PAYLOAD(a[IFA_ADDRESS]) != sizeof(ip_addr) || !a[IFA_LOCAL] || RTA_PAYLOAD(a[IFA_LOCAL]) != sizeof(ip_addr) || (a[IFA_BROADCAST] && RTA_PAYLOAD(a[IFA_BROADCAST]) != sizeof(ip_addr))) { log(L_ERR "nl_parse_addr: Malformed message received"); return; } if (i->ifa_flags & IFA_F_SECONDARY) { DBG("KRT: Received address message for secondary address which is not supported.\n"); /* FIXME */ return; } ifi = if_find_by_index(i->ifa_index); if (!ifi) { log(L_ERR "KRT: Received address message for unknown interface %d\n", i->ifa_index); return; } memcpy(&f, ifi, sizeof(f)); if (i->ifa_prefixlen > 32 || i->ifa_prefixlen == 31 || (f.flags & IF_UNNUMBERED) && i->ifa_prefixlen != 32) { log(L_ERR "KRT: Invalid prefix length for interface %s: %d\n", f.name, i->ifa_prefixlen); new = 0; } f.ip = f.brd = f.opposite = IPA_NONE; if (!new) { DBG("KRT: IF%d IP address deleted\n"); f.pxlen = 0; } else { memcpy(&f.ip, RTA_DATA(a[IFA_LOCAL]), sizeof(f.ip)); f.ip = ipa_ntoh(f.ip); f.pxlen = i->ifa_prefixlen; if (f.flags & IF_UNNUMBERED) { memcpy(&f.opposite, RTA_DATA(a[IFA_ADDRESS]), sizeof(f.opposite)); f.opposite = f.brd = ipa_ntoh(f.opposite); } else if ((f.flags & IF_BROADCAST) && a[IFA_BROADCAST]) { memcpy(&f.brd, RTA_DATA(a[IFA_BROADCAST]), sizeof(f.brd)); f.brd = ipa_ntoh(f.brd); } /* else a NBMA link */ f.prefix = ipa_and(f.ip, ipa_mkmask(f.pxlen)); DBG("KRT: IF%d IP address set to %I, net %I/%d, brd %I, opp %I\n", f.index, f.ip, f.prefix, f.pxlen, f.brd, f.opposite); } if_update(&f); } void krt_if_scan(struct krt_proto *p) { struct nlmsghdr *h; if_start_update(); nl_request_dump(RTM_GETLINK); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK) nl_parse_link(h, 1); else log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type); nl_request_dump(RTM_GETADDR); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR) nl_parse_addr(h); else log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type); if_end_update(); } /* * Routes */ int /* FIXME: Check use of this function in krt.c */ krt_capable(rte *e) { rta *a = e->attrs; if (a->cast != RTC_UNICAST) /* FIXME: For IPv6, we might support anycasts as well */ return 0; switch (a->dest) { case RTD_ROUTER: case RTD_DEVICE: case RTD_BLACKHOLE: case RTD_UNREACHABLE: case RTD_PROHIBIT: break; default: return 0; } return 1; } static void krt_delete(rte *e) { /* FIXME */ } static void krt_insert(rte *e) { /* FIXME */ } void krt_set_notify(struct proto *p, net *n, rte *new, rte *old) { /* FIXME: Use route updates if possible */ if (old) krt_delete(old); if (new) krt_insert(new); } struct iface * krt_temp_iface(struct krt_proto *p, unsigned index) { struct iface *i, *j; WALK_LIST(i, p->scan.temp_ifs) if (i->index == index) return i; i = mb_allocz(p->p.pool, sizeof(struct iface)); if (j = if_find_by_index(index)) strcpy(i->name, j->name); else strcpy(i->name, "?"); i->index = index; add_tail(&p->scan.temp_ifs, &i->n); return i; } static void nl_parse_route(struct krt_proto *p, struct nlmsghdr *h, int scan) { struct rtmsg *i; struct rtattr *a[RTA_CACHEINFO+1]; int new = h->nlmsg_type == RTM_NEWROUTE; ip_addr dst; rta ra; rte *e; net *net; u32 oif; if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(RTM_RTA(i), a, sizeof(a))) return; if (i->rtm_family != AF_INET) return; if ((a[RTA_DST] && RTA_PAYLOAD(a[RTA_DST]) != sizeof(ip_addr)) || (a[RTA_OIF] && RTA_PAYLOAD(a[RTA_OIF]) != 4) || (a[RTA_GATEWAY] && RTA_PAYLOAD(a[RTA_GATEWAY]) != sizeof(ip_addr))) { log(L_ERR "nl_parse_route: Malformed message received"); return; } if (i->rtm_table != RT_TABLE_MAIN) /* FIXME: What about other tables? */ return; if (i->rtm_tos != 0) /* FIXME: What about TOS? */ return; if (!new) { DBG("KRT: Ignoring route deletion\n"); return; } if (a[RTA_DST]) { memcpy(&dst, RTA_DATA(a[RTA_DST]), sizeof(dst)); dst = ipa_ntoh(dst); } else dst = IPA_NONE; if (a[RTA_OIF]) memcpy(&oif, RTA_DATA(a[RTA_OIF]), sizeof(oif)); else oif = ~0; DBG("Got %I/%d, type=%d, oif=%d\n", dst, i->rtm_dst_len, i->rtm_type, oif); net = net_get(&master_table, 0, dst, i->rtm_dst_len); ra.proto = &p->p; ra.source = RTS_INHERIT; ra.scope = SCOPE_UNIVERSE; /* FIXME: Use kernel scope? */ ra.cast = RTC_UNICAST; ra.tos = ra.flags = ra.aflags = 0; ra.from = IPA_NONE; ra.gw = IPA_NONE; ra.iface = NULL; ra.attrs = NULL; switch (i->rtm_type) { case RTN_UNICAST: if (oif == ~0U) { log(L_ERR "KRT: Mysterious route with no OIF (%I/%d)", net->n.prefix, net->n.pxlen); return; } if (a[RTA_GATEWAY]) { neighbor *ng; ra.dest = RTD_ROUTER; memcpy(&ra.gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra.gw)); ra.gw = ipa_ntoh(ra.gw); ng = neigh_find(&p->p, &ra.gw, 0); if (ng) ra.iface = ng->iface; else /* FIXME: Remove this warning? */ log(L_WARN "Kernel told us to use non-neighbor %I for %I/%d", ra.gw, net->n.prefix, net->n.pxlen); } else { ra.dest = RTD_DEVICE; ra.iface = krt_temp_iface(p, oif); } break; case RTN_BLACKHOLE: ra.dest = RTD_BLACKHOLE; break; case RTN_UNREACHABLE: ra.dest = RTD_UNREACHABLE; break; case RTN_PROHIBIT: ra.dest = RTD_PROHIBIT; break; /* FIXME: What about RTN_THROW? */ default: DBG("KRT: Ignoring route with type=%d\n", i->rtm_type); return; } e = rte_get_temp(&ra); e->net = net; krt_got_route(p, e); } void krt_scan_fire(struct krt_proto *p) { struct nlmsghdr *h; nl_request_dump(RTM_GETROUTE); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) nl_parse_route(p, h, 1); else log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); } /* * Asynchronous Netlink interface */ static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */ static int nl_async_hook(sock *sk, int size) { DBG("nl_async_hook\n"); return 0; } static void nl_open_async(struct krt_proto *p) { sock *sk; struct sockaddr_nl sa; DBG("KRT: Opening async netlink socket\n"); sk = nl_async_sk = sk_new(p->p.pool); sk->type = SK_MAGIC; sk->rx_hook = nl_async_hook; sk->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (sk->fd < 0 || sk_open(sk)) die("Unable to open secondary rtnetlink socket: %m"); bzero(&sa, sizeof(sa)); sa.nl_family = AF_NETLINK; sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE; if (bind(sk->fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) die("Unable to bind secondary rtnetlink socket: %m"); } /* * Interface to the UNIX krt module */ void krt_scan_preconfig(struct krt_config *x) { x->scan.async = 1; } void krt_scan_start(struct krt_proto *p) { init_list(&p->scan.temp_ifs); nl_open(); if (KRT_CF->scan.async) nl_open_async(p); } void krt_scan_shutdown(struct krt_proto *p) { }