diff options
author | Matthias Schiffer <mschiffer@universe-factory.net> | 2010-11-16 03:47:07 +0100 |
---|---|---|
committer | Matthias Schiffer <mschiffer@universe-factory.net> | 2010-11-16 03:47:07 +0100 |
commit | 5b885bf70848908c7fed07c8efba18ea316379d4 (patch) | |
tree | b4a94b0b5a49d2dd075a054270c386419c42900d /proto | |
parent | e6eee664723aa0e1d87eac65216cdf12636e3284 (diff) | |
download | bird-5b885bf70848908c7fed07c8efba18ea316379d4.tar bird-5b885bf70848908c7fed07c8efba18ea316379d4.zip |
BGP: Allow receiving multicast routes
Diffstat (limited to 'proto')
-rw-r--r-- | proto/bgp/attrs.c | 124 | ||||
-rw-r--r-- | proto/bgp/bgp.h | 18 | ||||
-rw-r--r-- | proto/bgp/packets.c | 72 |
3 files changed, 136 insertions, 78 deletions
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index ef5d024..b9a964c 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -215,22 +215,48 @@ bgp_format_cluster_list(eattr *a, byte *buf, int buflen UNUSED) } static int -bgp_check_reach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED) +bgp_check_reach_nlri(struct bgp_proto *p, byte *a, int len) { -#ifdef IPV6 - p->mp_reach_start = a; - p->mp_reach_len = len; -#endif + unsigned af, sub; + + if (len < 3) + return 5; + + af = get_u16(a); + sub = a[2]; + + if (af == BGP_AF) + { + if (sub == BGP_SAF_UNICAST || (sub == BGP_SAF_MULTICAST && p->multicast_session)) + { + p->mp_reach_start = a; + p->mp_reach_len = len; + } + } + return -1; } static int -bgp_check_unreach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED) +bgp_check_unreach_nlri(struct bgp_proto *p, byte *a, int len) { -#ifdef IPV6 - p->mp_unreach_start = a; - p->mp_unreach_len = len; -#endif + unsigned af, sub; + + if (len < 3) + return 5; + + af = get_u16(a); + sub = a[2]; + + if (af == BGP_AF) + { + if (sub == BGP_SAF_UNICAST || (sub == BGP_SAF_MULTICAST && p->multicast_session)) + { + p->mp_unreach_start = a; + p->mp_unreach_len = len; + } + } + return -1; } @@ -585,21 +611,21 @@ bgp_normalize_set(u32 *dest, u32 *src, unsigned cnt) } static void -bgp_rehash_buckets(struct bgp_proto *p) +bgp_rehash_buckets(struct bgp_proto *p, struct bgp_bucket_info *bi) { - struct bgp_bucket **old = p->bucket_hash; + struct bgp_bucket **old = bi->bucket_hash; struct bgp_bucket **new; - unsigned oldn = p->hash_size; + unsigned oldn = bi->hash_size; unsigned i, e, mask; struct bgp_bucket *b; - p->hash_size = p->hash_limit; - DBG("BGP: Rehashing bucket table from %d to %d\n", oldn, p->hash_size); - p->hash_limit *= 4; - if (p->hash_limit >= 65536) - p->hash_limit = ~0; - new = p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *)); - mask = p->hash_size - 1; + bi->hash_size = bi->hash_limit; + DBG("BGP: Rehashing bucket table from %d to %d\n", oldn, bi->hash_size); + bi->hash_limit *= 4; + if (bi->hash_limit >= 65536) + bi->hash_limit = ~0; + new = bi->bucket_hash = mb_allocz(p->p.pool, bi->hash_size * sizeof(struct bgp_bucket *)); + mask = bi->hash_size - 1; for (i=0; i<oldn; i++) while (b = old[i]) { @@ -615,7 +641,7 @@ bgp_rehash_buckets(struct bgp_proto *p) } static struct bgp_bucket * -bgp_new_bucket(struct bgp_proto *p, ea_list *new, unsigned hash) +bgp_new_bucket(struct bgp_proto *p, struct bgp_bucket_info *bi, ea_list *new, unsigned hash) { struct bgp_bucket *b; unsigned ea_size = sizeof(ea_list) + new->count * sizeof(eattr); @@ -623,7 +649,7 @@ bgp_new_bucket(struct bgp_proto *p, ea_list *new, unsigned hash) unsigned size = sizeof(struct bgp_bucket) + ea_size; unsigned i; byte *dest; - unsigned index = hash & (p->hash_size - 1); + unsigned index = hash & (bi->hash_size - 1); /* Gather total size of non-inline attributes */ for (i=0; i<new->count; i++) @@ -635,13 +661,13 @@ bgp_new_bucket(struct bgp_proto *p, ea_list *new, unsigned hash) /* Create the bucket and hash it */ b = mb_alloc(p->p.pool, size); - b->hash_next = p->bucket_hash[index]; + b->hash_next = bi->bucket_hash[index]; if (b->hash_next) b->hash_next->hash_prev = b; - p->bucket_hash[index] = b; + bi->bucket_hash[index] = b; b->hash_prev = NULL; b->hash = hash; - add_tail(&p->bucket_queue, &b->send_node); + add_tail(&bi->bucket_queue, &b->send_node); init_list(&b->prefixes); memcpy(b->eattrs, new, ea_size); dest = ((byte *)b->eattrs) + ea_size_aligned; @@ -661,15 +687,15 @@ bgp_new_bucket(struct bgp_proto *p, ea_list *new, unsigned hash) } /* If needed, rehash */ - p->hash_count++; - if (p->hash_count > p->hash_limit) - bgp_rehash_buckets(p); + bi->hash_count++; + if (bi->hash_count > bi->hash_limit) + bgp_rehash_buckets(p, bi); return b; } static struct bgp_bucket * -bgp_get_bucket(struct bgp_proto *p, net *n, ea_list *attrs, int originate) +bgp_get_bucket(struct bgp_proto *p, struct bgp_bucket_info *bi, net *n, ea_list *attrs, int originate) { ea_list *new; unsigned i, cnt, hash, code; @@ -735,7 +761,7 @@ bgp_get_bucket(struct bgp_proto *p, net *n, ea_list *attrs, int originate) /* Hash */ hash = ea_hash(new); - for(b=p->bucket_hash[hash & (p->hash_size - 1)]; b; b=b->hash_next) + for(b=bi->bucket_hash[hash & (bi->hash_size - 1)]; b; b=b->hash_next) if (b->hash == hash && ea_same(b->eattrs, new)) { DBG("Found bucket.\n"); @@ -760,18 +786,18 @@ bgp_get_bucket(struct bgp_proto *p, net *n, ea_list *attrs, int originate) /* Create new bucket */ DBG("Creating bucket.\n"); - return bgp_new_bucket(p, new, hash); + return bgp_new_bucket(p, bi, new, hash); } void -bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck) +bgp_free_bucket(struct bgp_bucket_info *bi, struct bgp_bucket *buck) { if (buck->hash_next) buck->hash_next->hash_prev = buck->hash_prev; if (buck->hash_prev) buck->hash_prev->hash_next = buck->hash_next; else - p->bucket_hash[buck->hash & (p->hash_size-1)] = buck->hash_next; + bi->bucket_hash[buck->hash & (bi->hash_size-1)] = buck->hash_next; mb_free(buck); } @@ -781,24 +807,25 @@ bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UN struct bgp_proto *p = (struct bgp_proto *) P; struct bgp_bucket *buck; struct bgp_prefix *px; + struct bgp_bucket_info *bi = (n->cast == RTC_MULTICAST) ? &p->multicast_buckets : &p->unicast_buckets; DBG("BGP: Got route %I/%d %s\n", n->n.prefix, n->n.pxlen, new ? "up" : "down"); if (new) { - buck = bgp_get_bucket(p, n, attrs, new->attrs->source != RTS_BGP); + buck = bgp_get_bucket(p, bi, n, attrs, new->attrs->source != RTS_BGP); if (!buck) /* Inconsistent attribute list */ return; } else { - if (!(buck = p->withdraw_bucket)) + if (!(buck = bi->withdraw_bucket)) { - buck = p->withdraw_bucket = mb_alloc(P->pool, sizeof(struct bgp_bucket)); + buck = bi->withdraw_bucket = mb_alloc(P->pool, sizeof(struct bgp_bucket)); init_list(&buck->prefixes); } } - px = fib_get(&p->prefix_fib, &n->n.prefix, n->n.pxlen); + px = fib_get(&bi->prefix_fib, &n->n.prefix, n->n.pxlen); if (px->bucket_node.next) { DBG("\tRemoving old entry.\n"); @@ -1370,6 +1397,7 @@ bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct lin ea->attrs[0].id = EA_CODE(EAP_BGP, code); ea->attrs[0].flags = flags; ea->attrs[0].type = type; + if (type & EAF_EMBEDDED) ad = NULL; else @@ -1399,13 +1427,12 @@ bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct lin break; } } + } -#ifdef IPV6 /* If we received MP_REACH_NLRI we should check mandatory attributes */ if (bgp->mp_reach_len != 0) mandatory = 1; -#endif /* If there is no (reachability) NLRI, we should exit now */ if (! mandatory) @@ -1481,15 +1508,22 @@ bgp_get_attr(eattr *a, byte *buf, int buflen) return GA_NAME; } +static void +bgp_attr_init_buckets(struct bgp_proto *p, struct bgp_bucket_info *bi) +{ + bi->hash_size = 256; + bi->hash_limit = bi->hash_size * 4; + bi->bucket_hash = mb_allocz(p->p.pool, bi->hash_size * sizeof(struct bgp_bucket *)); + init_list(&bi->bucket_queue); + bi->withdraw_bucket = NULL; + fib_init(&bi->prefix_fib, p->p.pool, sizeof(struct bgp_prefix), 0, bgp_init_prefix); +} + void bgp_attr_init(struct bgp_proto *p) { - p->hash_size = 256; - p->hash_limit = p->hash_size * 4; - p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *)); - init_list(&p->bucket_queue); - p->withdraw_bucket = NULL; - fib_init(&p->prefix_fib, p->p.pool, sizeof(struct bgp_prefix), 0, bgp_init_prefix); + bgp_attr_init_buckets(p, &p->unicast_buckets); + bgp_attr_init_buckets(p, &p->multicast_buckets); } void diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 359bd9c..6f998b0 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -80,6 +80,14 @@ struct bgp_conn { unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */ }; +struct bgp_bucket_info { + struct bgp_bucket **bucket_hash; /* Hash table of attribute buckets */ + unsigned int hash_size, hash_count, hash_limit; + struct fib prefix_fib; /* Prefixes to be sent */ + list bucket_queue; /* Queue of buckets to send */ + struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */ +}; + struct bgp_proto { struct proto p; struct bgp_config *cf; /* Shortcut to BGP configuration */ @@ -87,6 +95,7 @@ struct bgp_proto { int start_state; /* Substates that partitions BS_START */ int is_internal; /* Internal BGP connection (local_as == remote_as) */ int as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */ + int multicast_session; /* Both sides support multicast routes */ u32 local_id; /* BGP identifier of this router */ u32 remote_id; /* BGP identifier of the neighbor */ u32 rr_cluster_id; /* Route reflector cluster ID */ @@ -101,11 +110,8 @@ struct bgp_proto { rtable *igp_table; /* Table used for recursive next hop lookups */ struct event *event; /* Event for respawning and shutting process */ struct timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */ - struct bgp_bucket **bucket_hash; /* Hash table of attribute buckets */ - unsigned int hash_size, hash_count, hash_limit; - struct fib prefix_fib; /* Prefixes to be sent */ - list bucket_queue; /* Queue of buckets to send */ - struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */ + struct bgp_bucket_info unicast_buckets; + struct bgp_bucket_info multicast_buckets; unsigned startup_delay; /* Time to delay protocol startup by due to errors */ bird_clock_t last_proto_error; /* Time of last error that leads to protocol stop */ u8 last_error_class; /* Error class of last error */ @@ -188,7 +194,7 @@ void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *o int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *); void bgp_attr_init(struct bgp_proto *); unsigned int bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains); -void bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck); +void bgp_free_bucket(struct bgp_bucket_info *bi, struct bgp_bucket *buck); void bgp_get_route_info(struct rte *, byte *buf, struct ea_list *attrs); inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, unsigned attr, ip_addr a) diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 1a1e7b7..8e657cc 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -6,7 +6,7 @@ * Can be freely distributed and used under the terms of the GNU GPL. */ -#undef LOCAL_DEBUG +#define LOCAL_DEBUG #include "nest/bird.h" #include "nest/iface.h" @@ -228,7 +228,7 @@ bgp_create_open(struct bgp_conn *conn, byte *buf) } static unsigned int -bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, unsigned int remains) +bgp_encode_prefixes(struct bgp_bucket_info *bi, byte *w, struct bgp_bucket *buck, unsigned int remains) { byte *start = w; ip_addr a; @@ -246,20 +246,20 @@ bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, unsig w += bytes; remains -= bytes + 1; rem_node(&px->bucket_node); - fib_delete(&p->prefix_fib, px); + fib_delete(&bi->prefix_fib, px); } return w - start; } static void -bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck) +bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket_info *bi, struct bgp_bucket *buck) { while (!EMPTY_LIST(buck->prefixes)) { struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes)); log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen); rem_node(&px->bucket_node); - fib_delete(&p->prefix_fib, px); + fib_delete(&bi->prefix_fib, px); } } @@ -269,6 +269,7 @@ static byte * bgp_create_update(struct bgp_conn *conn, byte *buf) { struct bgp_proto *p = conn->bgp; + struct bgp_bucket_info *bi = &p->unicast_buckets; struct bgp_bucket *buck; int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4; byte *w; @@ -277,10 +278,10 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) int a_size = 0; w = buf+2; - if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) + if ((buck = bi->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) { DBG("Withdrawn routes:\n"); - wd_size = bgp_encode_prefixes(p, w, buck, remains); + wd_size = bgp_encode_prefixes(bi, w, buck, remains); w += wd_size; remains -= wd_size; } @@ -288,13 +289,13 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) if (remains >= 3072) { - while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next) + while ((buck = (struct bgp_bucket *) HEAD(bi->bucket_queue))->send_node.next) { if (EMPTY_LIST(buck->prefixes)) { DBG("Deleting empty bucket %p\n", buck); rem_node(&buck->send_node); - bgp_free_bucket(p, buck); + bgp_free_bucket(bi, buck); continue; } @@ -304,15 +305,15 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) if (a_size < 0) { log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name); - bgp_flush_prefixes(p, buck); + bgp_flush_prefixes(p, bi, buck); rem_node(&buck->send_node); - bgp_free_bucket(p, buck); + bgp_free_bucket(bi, buck); continue; } put_u16(w, a_size); w += a_size + 2; - r_size = bgp_encode_prefixes(p, w, buck, remains - a_size); + r_size = bgp_encode_prefixes(bi, w, buck, remains - a_size); w += r_size; break; } @@ -337,6 +338,7 @@ static byte * bgp_create_update(struct bgp_conn *conn, byte *buf) { struct bgp_proto *p = conn->bgp; + struct bgp_bucket_info *bi = &p->unicast_buckets; struct bgp_bucket *buck; int size, second, rem_stored; int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4; @@ -349,14 +351,14 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) put_u16(buf, 0); w = buf+4; - if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) + if ((buck = bi->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) { DBG("Withdrawn routes:\n"); tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8); *tmp++ = 0; *tmp++ = BGP_AF_IPV6; - *tmp++ = 1; - ea->attrs[0].u.ptr->length = 3 + bgp_encode_prefixes(p, tmp, buck, remains-11); + *tmp++ = BGP_SAF_UNICAST; + ea->attrs[0].u.ptr->length = 3 + bgp_encode_prefixes(bi, tmp, buck, remains-11); size = bgp_encode_attrs(p, w, ea, remains); ASSERT(size >= 0); w += size; @@ -365,13 +367,13 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) if (remains >= 3072) { - while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next) + while ((buck = (struct bgp_bucket *) HEAD(bi->bucket_queue))->send_node.next) { if (EMPTY_LIST(buck->prefixes)) { DBG("Deleting empty bucket %p\n", buck); rem_node(&buck->send_node); - bgp_free_bucket(p, buck); + bgp_free_bucket(bi, buck); continue; } @@ -383,9 +385,9 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) if (size < 0) { log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name); - bgp_flush_prefixes(p, buck); + bgp_flush_prefixes(p, bi, buck); rem_node(&buck->send_node); - bgp_free_bucket(p, buck); + bgp_free_bucket(bi, buck); continue; } w += size; @@ -433,9 +435,9 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) log(L_ERR "%s: Missing link-local next hop address, skipping corresponding routes", p->p.name); w = w_stored; remains = rem_stored; - bgp_flush_prefixes(p, buck); + bgp_flush_prefixes(p, bi, buck); rem_node(&buck->send_node); - bgp_free_bucket(p, buck); + bgp_free_bucket(bi, buck); continue; case MLL_IGNORE: break; @@ -467,7 +469,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) } *tmp++ = 0; /* No SNPA information */ - tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1)); + tmp += bgp_encode_prefixes(bi, tmp, buck, remains - (8+3+32+1)); ea->attrs[0].u.ptr->length = tmp - tstart; size = bgp_encode_attrs(p, w, ea, remains); ASSERT(size >= 0); @@ -804,6 +806,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3; p->remote_id = id; p->as4_session = conn->want_as4_support && conn->peer_as4_support; + p->multicast_session = conn->want_multicast_support && conn->peer_multicast_support; DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session); @@ -930,7 +933,7 @@ bgp_do_rx_update(struct bgp_conn *conn, } \ else \ af = 0; \ - if (af == BGP_AF_IPV6) + if (af == BGP_AF) static void bgp_do_rx_update(struct bgp_conn *conn, @@ -947,6 +950,7 @@ bgp_do_rx_update(struct bgp_conn *conn, ip_addr prefix; net *n; int err = 0, pxlen; + int multicast; p->mp_reach_len = 0; p->mp_unreach_len = 0; @@ -956,17 +960,30 @@ bgp_do_rx_update(struct bgp_conn *conn, DO_NLRI(mp_unreach) { + multicast = (sub == BGP_SAF_MULTICAST); + + if (multicast) + ASSERT(p->multicast_session); + while (len) { DECODE_PREFIX(x, len); - DBG("Withdraw %I/%d\n", prefix, pxlen); - if (n = net_find(p->p.table, prefix, pxlen)) + DBG("Withdraw %I/%d%s\n", prefix, pxlen, multicast ? " MC" : ""); + if (n = net_find_cast(p->p.table, prefix, pxlen, multicast ? RTC_MULTICAST : RTC_UNICAST)) rte_update(p->p.table, n, &p->p, &p->p, NULL); } } DO_NLRI(mp_reach) { + multicast = (sub == BGP_SAF_MULTICAST); + + if (multicast) + { + ASSERT(p->multicast_session); + a0->cast = RTC_MULTICAST; + } + /* Create fake NEXT_HOP attribute */ if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2) goto bad; @@ -991,13 +1008,14 @@ bgp_do_rx_update(struct bgp_conn *conn, if (bgp_set_next_hop(p, a0)) { a = rta_lookup(a0); + while (len) { rte *e; DECODE_PREFIX(x, len); - DBG("Add %I/%d\n", prefix, pxlen); + DBG("Add %I/%d%s\n", prefix, pxlen, multicast ? " MC" : ""); e = rte_get_temp(rta_clone(a)); - n = net_get(p->p.table, prefix, pxlen); + n = net_get_cast(p->p.table, prefix, pxlen, multicast ? RTC_MULTICAST : RTC_UNICAST); e->net = n; e->pflags = 0; rte_update(p->p.table, n, &p->p, &p->p, e); |