diff options
-rw-r--r-- | conf/conf.c | 5 | ||||
-rw-r--r-- | doc/bird.conf.example | 3 | ||||
-rw-r--r-- | doc/bird.sgml | 57 | ||||
-rw-r--r-- | filter/config.Y | 11 | ||||
-rw-r--r-- | filter/filter.c | 26 | ||||
-rw-r--r-- | filter/filter.h | 1 | ||||
-rw-r--r-- | lib/socket.h | 2 | ||||
-rw-r--r-- | nest/a-path.c | 223 | ||||
-rw-r--r-- | nest/a-set.c | 8 | ||||
-rw-r--r-- | nest/attrs.h | 25 | ||||
-rw-r--r-- | proto/bgp/attrs.c | 653 | ||||
-rw-r--r-- | proto/bgp/bgp.c | 35 | ||||
-rw-r--r-- | proto/bgp/bgp.h | 26 | ||||
-rw-r--r-- | proto/bgp/config.Y | 20 | ||||
-rw-r--r-- | proto/bgp/packets.c | 158 | ||||
-rw-r--r-- | sysdep/linux/netlink/netlink.c | 9 | ||||
-rw-r--r-- | sysdep/linux/sysio.h | 21 | ||||
-rw-r--r-- | sysdep/unix/io.c | 75 | ||||
-rw-r--r-- | sysdep/unix/krt-set.c | 2 | ||||
-rw-r--r-- | sysdep/unix/krt.c | 2 |
20 files changed, 1114 insertions, 248 deletions
diff --git a/conf/conf.c b/conf/conf.c index a744dca..fefcac5 100644 --- a/conf/conf.c +++ b/conf/conf.c @@ -266,7 +266,7 @@ config_commit(struct config *c) } if (old_config) /* Reconfiguration already in progress */ { - if (shutting_down) + if (shutting_down == 2) { log(L_INFO "New configuration discarded due to shutdown"); config_free(c); @@ -314,8 +314,9 @@ order_shutdown(void) init_list(&c->protos); init_list(&c->tables); c->shutdown = 1; - config_commit(c); shutting_down = 1; + config_commit(c); + shutting_down = 2; } /** diff --git a/doc/bird.conf.example b/doc/bird.conf.example index 05259d5..22221d4 100644 --- a/doc/bird.conf.example +++ b/doc/bird.conf.example @@ -179,6 +179,9 @@ protocol static { # default bgp_med 0; # MED value we use for comparison when none is defined # default bgp_local_pref 0; # The same for local preference # source address 62.168.0.14; # What local address we use for the TCP connection +# password "secret" # Password used for MD5 authentication +# rr client; # I am a route reflector and the neighor is my client +# rr cluster id 1.0.0.1 # Use this value for cluster id instead of my router id # export where source=RTS_STATIC; # export filter { # if source = RTS_STATIC then { diff --git a/doc/bird.sgml b/doc/bird.sgml index 87113fa..8fa55f8 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -655,13 +655,19 @@ routing table it wishes to export along with complete path information route) in order to avoid routing loops. <p>BIRD supports all requirements of the BGP4 standard as defined in -RFC 1771<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc1771.txt"> -including several enhancements from the -latest draft<htmlurl url="ftp://ftp.rfc-editor.org/internet-drafts/draft-ietf-idr-bgp4-09.txt">. -It also supports the community attributes as per -RFC 1997<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc1997.txt">, -capability negotiation defined in -RFC 2842<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc2842.txt">. +RFC 4271<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc4271.txt"> +It also supports the community attributes +(RFC 1997<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc1997.txt">), +capability negotiation +(RFC 3392<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc3392.txt">), +MD5 password authentication +(RFC 2385<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc2385.txt">), +route reflectors +(RFC 4456<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc4456.txt">), +and 4B AS numbers +(RFC 4893<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc4893.txt">). + + For IPv6, it uses the standard multiprotocol extensions defined in RFC 2283<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc2283.txt"> including changes described in the @@ -721,6 +727,27 @@ for each neighbor using the following configuration parameters: for next hop calculation. Default: the address of the local end of the interface our neighbor is connected to. + <tag>password <m/string/</tag> Use this password for MD5 authentication + of BGP sessions. Default: no authentication. + + <tag>rr client</tag> Be a route reflector and treat neighbor as + route reflection client. Default: disabled. + + <tag>rr cluster id <m/IPv4 address/</tag> Route reflectors use cluster id + to avoid route reflection loops. When there is one route reflector in a cluster + it usually uses its router id as a cluster id, but when there are more route + reflectors in a cluster, these need to be configured (using this option) to + use a common cluster id. Clients in a cluster need not known their cluster + id and this option is not allowed to them Default: a same as router id. + + <tag>enable as4 <m/switch/</tag> BGP protocol was designed to use 2B AS numbers + and was extended later to allow 4B AS number. BIRD supports 4B AS extension, + but by disabling this option it can be persuaded not to advertise it and + to maintain old-style sessions with its neighbors. This might be useful for + circumventing bugs in neighbor's implementation of 4B AS extension. + Even when disabled (off), BIRD behaves internally as AS4-aware BGP router. + Default: on. + <tag>disable after error <m/switch/</tag> When an error is encountered (either locally or by the other side), disable the instance automatically and wait for an administrator to fix the problem manually. Default: off. @@ -757,7 +784,7 @@ for each neighbor using the following configuration parameters: <tag>default bgp_med <m/number/</tag> Value of the Multiple Exit Discriminator to be used during route selection when the MED attribute - is missing. Default: infinite. + is missing. Default: 0. <tag>default bgp_local_pref <m/number/</tag> Value of the Local Preference to be used during route selection when the Local Preference attribute @@ -779,10 +806,16 @@ with `<tt/O/') are optional. selection among multiple BGP routes (see the selection rules above). It's used as an additional metric which is propagated through the whole local AS. - <tag>int <cf/bgp_med/ [IO]</tag> The Multiple Exit Discriminator of the route - is an optional attribute which is often used within the local AS to - reflect interior distances to various boundary routers. See the route selection - rules above for exact semantics. + <tag>int <cf/bgp_med/ [O]</tag> The Multiple Exit Discriminator of the route + is an optional attribute which is used on on external (inter-AS) links to + convey to an adjacent AS the optimal entry point into the local AS. + The received attribute may be also propagated over internal BGP links + (and this is default behavior). The attribute value is zeroed when a route + is exported from a routing table to a BGP instance to ensure that the attribute + received from a neighboring AS is not propagated to other neighboring ASes. + A new value might be set in the export filter of a BGP instance. + See RFC 4451<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc4451.txt"> + for further discussion of BGP MED attribute. <tag>enum <cf/bgp_origin/</tag> Origin of the route: either <cf/ORIGIN_IGP/ if the route has originated in an interior routing protocol or diff --git a/filter/config.Y b/filter/config.Y index d4bf44c..fdfb2e7 100644 --- a/filter/config.Y +++ b/filter/config.Y @@ -39,7 +39,6 @@ CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN, %type <v> set_atom fprefix fprefix_s fipa %type <s> decls declsn one_decl function_params %type <h> bgp_path -%type <i> bgp_one CF_GRAMMAR @@ -273,14 +272,12 @@ switch_body: /* EMPTY */ { $$ = NULL; } /* CONST '(' expr ')' { $$ = f_new_inst(); $$->code = 'c'; $$->aux = T_INT; $$->a2.i = $3; } */ -bgp_one: - NUM { $$ = $1; } - | '?' { $$ = PM_ANY; } - ; bgp_path: - bgp_one { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = NULL; $$->val = $1; } - | bgp_one bgp_path { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->val = $1; } + NUM { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = NULL; $$->val = $1; $$->any = 0; } + | '?' { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = NULL; $$->val = 0; $$->any = 1; } + | NUM bgp_path { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->val = $1; $$->any = 0; } + | '?' bgp_path { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->val = 0; $$->any = 1; } ; constant: diff --git a/filter/filter.c b/filter/filter.c index 9cde3d9..7893d9a 100644 --- a/filter/filter.c +++ b/filter/filter.c @@ -69,6 +69,30 @@ pm_path_compare(struct f_path_mask *m1, struct f_path_mask *m2) } } +static void +pm_format(struct f_path_mask *p, byte *buf, unsigned int size) +{ + byte *end = buf + size - 16; + + while (p) + { + if (buf > end) + { + strcpy(buf, " ..."); + return; + } + + if (p->any) + buf += bsprintf(buf, "? "); + else + buf += bsprintf(buf, "%u ", p->val); + + p = p->next; + } + + *buf = 0; +} + /** * val_compare - compare two values * @v1: first value @@ -224,7 +248,7 @@ val_print(struct f_val v) case T_ENUM: PRINTF( "(enum %x)%d", v.type, v.val.i ); break; case T_PATH: as_path_format(v.val.ad, buf2, 1020); PRINTF( "(path %s)", buf2 ); break; case T_CLIST: int_set_format(v.val.ad, buf2, 1020); PRINTF( "(clist %s)", buf2 ); break; - case T_PATH_MASK: debug( "(pathmask " ); { struct f_path_mask *p = v.val.path_mask; while (p) { debug("%d ", p->val); p=p->next; } debug(")" ); } break; + case T_PATH_MASK: pm_format(v.val.path_mask, buf2, 1020); PRINTF( "(pathmask %s)", buf2 ); break; default: PRINTF( "[unknown type %x]", v.type ); #undef PRINTF } diff --git a/filter/filter.h b/filter/filter.h index 04a2623..f71e54d 100644 --- a/filter/filter.h +++ b/filter/filter.h @@ -11,6 +11,7 @@ #include "lib/resource.h" #include "lib/ip.h" +#include "nest/route.h" #include "nest/attrs.h" struct f_inst { /* Instruction */ diff --git a/lib/socket.h b/lib/socket.h index ab932b3..4aa521d 100644 --- a/lib/socket.h +++ b/lib/socket.h @@ -39,6 +39,7 @@ typedef struct birdsock { int fd; /* System-dependent data */ node n; void *rbuf_alloc, *tbuf_alloc; + char *password; /* Password for MD5 authentication */ } sock; sock *sk_new(pool *); /* Allocate new socket */ @@ -47,6 +48,7 @@ int sk_send(sock *, unsigned len); /* Send data, <0=err, >0=ok, 0=sleep */ int sk_send_to(sock *, unsigned len, ip_addr to, unsigned port); /* sk_send to given destination */ void sk_reallocate(sock *); /* Free and allocate tbuf & rbuf */ void sk_dump_all(void); +int sk_set_md5_auth(sock *s, ip_addr a, char *passwd); /* Add or remove security associations for given passive socket */ static inline int sk_send_buffer_empty(sock *sk) diff --git a/nest/a-path.c b/nest/a-path.c index 1b08f80..5e3ecfd 100644 --- a/nest/a-path.c +++ b/nest/a-path.c @@ -14,38 +14,139 @@ #include "lib/unaligned.h" #include "lib/string.h" + +/* Global AS4 support, shared by all BGP instances. + * This specifies whether BA_AS_PATH attributes contain 2 or 4 B per ASN + */ + +int bgp_as4_support = 1; + +static void +put_as(byte *data, u32 as) +{ + if (bgp_as4_support) + put_u32(data, as); + else if (as <= 0xFFFF) + put_u16(data, as); + else + bug("put_as: Try to put 32bit AS to 16bit AS Path"); +} + +static inline u32 +get_as(byte *data) +{ + return bgp_as4_support ? get_u32(data) : get_u16(data); +} + struct adata * -as_path_prepend(struct linpool *pool, struct adata *olda, int as) +as_path_prepend(struct linpool *pool, struct adata *olda, u32 as) { + int bs = bgp_as4_support ? 4 : 2; struct adata *newa; - if (olda->length && olda->data[0] == AS_PATH_SEQUENCE && - olda->data[1] < 255) /* Starting with sequence => just prepend the AS number */ + if (olda->length && olda->data[0] == AS_PATH_SEQUENCE && olda->data[1] < 255) + /* Starting with sequence => just prepend the AS number */ { - newa = lp_alloc(pool, sizeof(struct adata) + olda->length + 2); - newa->length = olda->length + 2; - newa->data[0] = 2; + int nl = olda->length + bs; + newa = lp_alloc(pool, sizeof(struct adata) + nl); + newa->length = nl; + newa->data[0] = AS_PATH_SEQUENCE; newa->data[1] = olda->data[1] + 1; - memcpy(newa->data+4, olda->data+2, olda->length-2); + memcpy(newa->data + bs + 2, olda->data + 2, olda->length - 2); } - else /* Create new path segment */ + else /* Create new path segment */ { - newa = lp_alloc(pool, sizeof(struct adata) + olda->length + 4); - newa->length = olda->length + 4; - newa->data[0] = 2; + int nl = olda->length + bs + 2; + newa = lp_alloc(pool, sizeof(struct adata) + nl); + newa->length = nl; + newa->data[0] = AS_PATH_SEQUENCE; newa->data[1] = 1; - memcpy(newa->data+4, olda->data, olda->length); + memcpy(newa->data + bs + 2, olda->data, olda->length); } - put_u16(newa->data+2, as); + put_as(newa->data + 2, as); return newa; } +int +as_path_convert_to_old(struct adata *path, byte *dst, int *new_used) +{ + byte *src = path->data; + byte *src_end = src + path->length; + byte *dst_start = dst; + u32 as; + int i, n; + *new_used = 0; + + while (src < src_end) + { + n = src[1]; + *dst++ = *src++; + *dst++ = *src++; + + for(i=0; i<n; i++) + { + as = get_u32(src); + if (as > 0xFFFF) + { + as = AS_TRANS; + *new_used = 1; + } + put_u16(dst, as); + src += 4; + dst += 2; + } + } + + return dst - dst_start; +} + +int +as_path_convert_to_new(struct adata *path, byte *dst, int req_as) +{ + byte *src = path->data; + byte *src_end = src + path->length; + byte *dst_start = dst; + u32 as; + int i, t, n; + + + while ((src < src_end) && (req_as > 0)) + { + t = *src++; + n = *src++; + + if (t == AS_PATH_SEQUENCE) + { + if (n > req_as) + n = req_as; + + req_as -= n; + } + else // t == AS_PATH_SET + req_as--; + + *dst++ = t; + *dst++ = n; + + for(i=0; i<n; i++) + { + as = get_u16(src); + put_u32(dst, as); + src += 2; + dst += 4; + } + } + + return dst - dst_start; +} + void as_path_format(struct adata *path, byte *buf, unsigned int size) { + int bs = bgp_as4_support ? 4 : 2; byte *p = path->data; byte *e = p + path->length; - byte *end = buf + size - 8; + byte *end = buf + size - 16; int sp = 1; int l, isset; @@ -69,8 +170,8 @@ as_path_format(struct adata *path, byte *buf, unsigned int size) { if (!sp) *buf++ = ' '; - buf += bsprintf(buf, "%d", get_u16(p)); - p += 2; + buf += bsprintf(buf, "%u", get_as(p)); + p += bs; sp = 0; } if (isset) @@ -86,6 +187,7 @@ as_path_format(struct adata *path, byte *buf, unsigned int size) int as_path_getlen(struct adata *path) { + int bs = bgp_as4_support ? 4 : 2; int res = 0; u8 *p = path->data; u8 *q = p+path->length; @@ -95,8 +197,8 @@ as_path_getlen(struct adata *path) { switch (*p++) { - case AS_PATH_SET: len = *p++; res++; p += 2*len; break; - case AS_PATH_SEQUENCE: len = *p++; res+=len; p += 2*len; break; + case AS_PATH_SET: len = *p++; res++; p += bs * len; break; + case AS_PATH_SEQUENCE: len = *p++; res += len; p += bs * len; break; default: bug("as_path_getlen: Invalid path segment"); } } @@ -104,9 +206,11 @@ as_path_getlen(struct adata *path) } int -as_path_get_first(struct adata *path) +as_path_get_first(struct adata *path, u32 *orig_as) { - int res = -1; + int bs = bgp_as4_support ? 4 : 2; + int found = 0; + u32 res = 0; u8 *p = path->data; u8 *q = p+path->length; int len; @@ -117,36 +221,84 @@ as_path_get_first(struct adata *path) { case AS_PATH_SET: if (len = *p++) - res = get_u16(p); - p += 2*len; + { + found = 1; + res = get_as(p); + p += bs * len; + } break; case AS_PATH_SEQUENCE: if (len = *p++) - res = get_u16(p+2*(len-1)); - p += 2*len; + { + found = 1; + res = get_as(p + bs * (len - 1)); + p += bs * len; + } break; default: bug("as_path_get_first: Invalid path segment"); } } - return res; + + *orig_as = res; + return found; +} + +int +as_path_get_last(struct adata *path, u32 *last_as) +{ + u8 *p = path->data; + + if ((path->length == 0) || (p[0] != AS_PATH_SEQUENCE) || (p[1] == 0)) + return 0; + else + { + *last_as = get_as(p+2); + return 1; + } } +int +as_path_is_member(struct adata *path, u32 as) +{ + int bs = bgp_as4_support ? 4 : 2; + u8 *p = path->data; + u8 *q = p+path->length; + int i, n; + + while (p<q) + { + n = p[1]; + p += 2; + for(i=0; i<n; i++) + { + if (get_as(p) == as) + return 1; + p += bs; + } + } + return 0; +} + + + #define MASK_PLUS do { mask = mask->next; if (!mask) return next == q; \ - asterisk = (mask->val == PM_ANY); \ + asterisk = mask->any; \ if (asterisk) { mask = mask->next; if (!mask) { return 1; } } \ } while(0) int as_path_match(struct adata *path, struct f_path_mask *mask) { + int bs = bgp_as4_support ? 4 : 2; int i; int asterisk = 0; u8 *p = path->data; u8 *q = p+path->length; int len; u8 *next; + u32 as; - asterisk = (mask->val == PM_ANY); + asterisk = mask->any; if (asterisk) { mask = mask->next; if (!mask) return 1; } @@ -156,20 +308,21 @@ as_path_match(struct adata *path, struct f_path_mask *mask) len = *p++; { u8 *p_save = p; - next = p_save + 2*len; + next = p_save + bs * len; retry: p = p_save; for (i=0; i<len; i++) { - if (asterisk && (get_u16(p) == mask->val)) { + as = get_as(p); + if (asterisk && (as == mask->val)) { MASK_PLUS; goto retry; } - if (!asterisk && (get_u16(p) == mask->val)) { + if (!asterisk && (as == mask->val)) { p = next; MASK_PLUS; goto okay; } - p+=2; + p += bs; } if (!asterisk) return 0; @@ -180,15 +333,15 @@ as_path_match(struct adata *path, struct f_path_mask *mask) case AS_PATH_SEQUENCE: len = *p++; for (i=0; i<len; i++) { - next = p+2; - if (asterisk && (get_u16(p) == mask->val)) + as = get_as(p); + if (asterisk && (as == mask->val)) MASK_PLUS; else if (!asterisk) { - if (get_u16(p) != mask->val) + if (as != mask->val) return 0; MASK_PLUS; } - p+=2; + p += bs; } break; diff --git a/nest/a-set.c b/nest/a-set.c index 4440714..69c090b 100644 --- a/nest/a-set.c +++ b/nest/a-set.c @@ -40,10 +40,12 @@ int_set_format(struct adata *set, byte *buf, unsigned int size) struct adata * int_set_add(struct linpool *pool, struct adata *list, u32 val) { - struct adata *res = lp_alloc(pool, list->length + sizeof(struct adata) + 4); - res->length = list->length+4; + int len = list ? list->length : 0; + struct adata *res = lp_alloc(pool, len + sizeof(struct adata) + 4); + res->length = len + 4; * (u32 *) res->data = val; - memcpy((char *) res->data + 4, list->data, list->length); + if (list) + memcpy((char *) res->data + 4, list->data, list->length); return res; } diff --git a/nest/attrs.h b/nest/attrs.h index abd6b9e..fee2c2c 100644 --- a/nest/attrs.h +++ b/nest/attrs.h @@ -14,16 +14,30 @@ #define AS_PATH_SET 1 /* Types of path segments */ #define AS_PATH_SEQUENCE 2 -struct adata *as_path_prepend(struct linpool *pool, struct adata *olda, int as); +#define AS_PATH_MAXLEN 10000 + +#define AS_TRANS 23456 +/* AS_TRANS is used when we need to store 32bit ASN larger than 0xFFFF + * to 16bit slot (like in 16bit AS_PATH). See RFC 4893 for details + */ + +struct adata *as_path_prepend(struct linpool *pool, struct adata *olda, u32 as); +int as_path_convert_to_old(struct adata *path, byte *dst, int *new_used); +int as_path_convert_to_new(struct adata *path, byte *dst, int req_as); void as_path_format(struct adata *path, byte *buf, unsigned int size); int as_path_getlen(struct adata *path); -int as_path_get_first(struct adata *path); +int as_path_get_first(struct adata *path, u32 *orig_as); +int as_path_get_last(struct adata *path, u32 *last_as); +int as_path_is_member(struct adata *path, u32 as); + struct f_path_mask { struct f_path_mask *next; - int val; + u32 val; + int any; }; -#define PM_ANY -1 + +// #define PM_ANY -1 int as_path_match(struct adata *path, struct f_path_mask *mask); @@ -34,4 +48,7 @@ struct adata *int_set_add(struct linpool *pool, struct adata *list, u32 val); int int_set_contains(struct adata *list, u32 val); struct adata *int_set_del(struct linpool *pool, struct adata *list, u32 val); +static inline int int_set_get_size(struct adata *list) +{ return list->length / 4; } + #endif diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 30699f8..2210cbe 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -55,22 +55,38 @@ bgp_format_origin(eattr *a, byte *buf) } static int -bgp_check_path(struct bgp_proto *p UNUSED, byte *a, int len) +bgp_check_path(byte *a, int len, int bs, int errcode) { while (len) { DBG("Path segment %02x %02x\n", a[0], a[1]); if (len < 2 || - a[0] != AS_PATH_SET && a[0] != AS_PATH_SEQUENCE || - 2*a[1] + 2 > len) - return 11; - len -= 2*a[1] + 2; - a += 2*a[1] + 2; + (a[0] != AS_PATH_SET && a[0] != AS_PATH_SEQUENCE) || + bs * a[1] + 2 > len) + return errcode; + len -= bs * a[1] + 2; + a += bs * a[1] + 2; } return 0; } static int +bgp_check_as_path(struct bgp_proto *p, byte *a, int len) +{ + return bgp_check_path(a, len, p->as4_session ? 4 : 2, 11); +} + +static int +bgp_check_as4_path(struct bgp_proto *p, byte *a, int len) +{ + if (bgp_as4_support && (! p->as4_session)) + return bgp_check_path(a, len, 4, 9); + else + return 0; +} + + +static int bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a, int len) { #ifdef IPV6 @@ -88,6 +104,20 @@ bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a, int len) } static int +bgp_check_aggregator(struct bgp_proto *p, UNUSED byte *a, int len) +{ + int exp_len = p->as4_session ? 8 : 6; + + return (len == exp_len) ? 0 : 5; +} + +static int +bgp_check_cluster_list(struct bgp_proto *p UNUSED, UNUSED byte *a, int len) +{ + return ((len % 4) == 0) ? 0 : 5; +} + +static int bgp_check_reach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED) { #ifdef IPV6 @@ -113,21 +143,23 @@ static struct attr_desc bgp_attr_table[] = { { "origin", 1, BAF_TRANSITIVE, EAF_TYPE_INT, 1, /* BA_ORIGIN */ bgp_check_origin, bgp_format_origin }, { "as_path", -1, BAF_TRANSITIVE, EAF_TYPE_AS_PATH, 1, /* BA_AS_PATH */ - bgp_check_path, NULL }, + bgp_check_as_path, NULL }, { "next_hop", 4, BAF_TRANSITIVE, EAF_TYPE_IP_ADDRESS, 1, /* BA_NEXT_HOP */ bgp_check_next_hop, NULL }, - { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 0, /* BA_MULTI_EXIT_DISC */ + { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 1, /* BA_MULTI_EXIT_DISC */ NULL, NULL }, { "local_pref", 4, BAF_TRANSITIVE, EAF_TYPE_INT, 0, /* BA_LOCAL_PREF */ NULL, NULL }, { "atomic_aggr", 0, BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_ATOMIC_AGGR */ NULL, NULL }, - { "aggregator", 6, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AGGREGATOR */ - NULL, NULL }, + { "aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AGGREGATOR */ + bgp_check_aggregator, NULL }, { "community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_INT_SET, 1, /* BA_COMMUNITY */ NULL, NULL }, - { NULL, }, /* BA_ORIGINATOR_ID */ - { NULL, }, /* BA_CLUSTER_LIST */ + { "originator_id", 4, BAF_OPTIONAL, EAF_TYPE_INT, 0, /* BA_ORIGINATOR_ID */ + NULL, NULL }, + { "cluster_list", -1, BAF_OPTIONAL, EAF_TYPE_INT_SET, 0, /* BA_CLUSTER_LIST */ + bgp_check_cluster_list, NULL }, { NULL, }, /* BA_DPA */ { NULL, }, /* BA_ADVERTISER */ { NULL, }, /* BA_RCID_PATH */ @@ -135,43 +167,152 @@ static struct attr_desc bgp_attr_table[] = { bgp_check_reach_nlri, NULL }, { "mp_unreach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1, /* BA_MP_UNREACH_NLRI */ bgp_check_unreach_nlri, NULL }, + { NULL, }, /* BA_EXTENDED_COMM */ + { "as4_path", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */ + bgp_check_as4_path, NULL }, + { "as4_aggregator", 8, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */ + NULL, NULL } }; +/* BA_AS4_PATH is type EAF_TYPE_OPAQUE and not type EAF_TYPE_AS_PATH because + * EAF_TYPE_AS_PATH is supposed to have different format (2 or 4 B for each ASN) + * depending on bgp_as4_support variable. + */ + #define ATTR_KNOWN(code) ((code) < ARRAY_SIZE(bgp_attr_table) && bgp_attr_table[code].name) -static byte * -bgp_set_attr(eattr *e, struct linpool *pool, unsigned attr, unsigned val) +static inline struct adata * +bgp_alloc_adata(struct linpool *pool, unsigned len) +{ + struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len); + ad->length = len; + return ad; +} + +static void +bgp_set_attr(eattr *e, unsigned attr, uintptr_t val) { ASSERT(ATTR_KNOWN(attr)); e->id = EA_CODE(EAP_BGP, attr); e->type = bgp_attr_table[attr].type; e->flags = bgp_attr_table[attr].expected_flags; if (e->type & EAF_EMBEDDED) - { - e->u.data = val; - return NULL; - } + e->u.data = val; else - { - e->u.ptr = lp_alloc(pool, sizeof(struct adata) + val); - e->u.ptr->length = val; - return e->u.ptr->data; - } + e->u.ptr = (struct adata *) val; } -byte * -bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, unsigned val) +static byte * +bgp_set_attr_wa(eattr *e, struct linpool *pool, unsigned attr, unsigned len) +{ + struct adata *ad = bgp_alloc_adata(pool, len); + bgp_set_attr(e, attr, (uintptr_t) ad); + return ad->data; +} + +void +bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val) { ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); a->next = *to; *to = a; a->flags = EALF_SORTED; a->count = 1; - return bgp_set_attr(a->attrs, pool, attr, val); + bgp_set_attr(a->attrs, attr, val); } +byte * +bgp_attach_attr_wa(ea_list **to, struct linpool *pool, unsigned attr, unsigned len) +{ + struct adata *ad = bgp_alloc_adata(pool, len); + bgp_attach_attr(to, pool, attr, (uintptr_t) ad); + return ad->data; +} + +static int +bgp_encode_attr_hdr(byte *dst, unsigned int flags, unsigned code, int len) +{ + int wlen; + + DBG("\tAttribute %02x (%d bytes, flags %02x)\n", code, len, flags); + + if (len < 256) + { + *dst++ = flags; + *dst++ = code; + *dst++ = len; + wlen = 3; + } + else + { + *dst++ = flags | BAF_EXT_LEN; + *dst++ = code; + put_u16(dst, len); + wlen = 4; + } + + return wlen; +} + +static void +aggregator_convert_to_old(struct adata *aggr, byte *dst, int *new_used) +{ + byte *src = aggr->data; + *new_used = 0; + + u32 as = get_u32(src); + if (as > 0xFFFF) + { + as = AS_TRANS; + *new_used = 1; + } + put_u16(dst, as); + + /* Copy IPv4 address */ + memcpy(dst + 2, src + 4, 4); +} + +static void +aggregator_convert_to_new(struct adata *aggr, byte *dst) +{ + byte *src = aggr->data; + + u32 as = get_u16(src); + put_u32(dst, as); + + /* Copy IPv4 address */ + memcpy(dst + 4, src + 2, 4); +} + +static int +bgp_get_attr_len(eattr *a) +{ + int len; + if (ATTR_KNOWN(EA_ID(a->id))) + { + int code = EA_ID(a->id); + struct attr_desc *desc = &bgp_attr_table[code]; + len = desc->expected_length; + if (len < 0) + { + ASSERT(!(a->type & EAF_EMBEDDED)); + len = a->u.ptr->length; + } + } + else + { + ASSERT((a->type & EAF_TYPE_MASK) == EAF_TYPE_OPAQUE); + len = a->u.ptr->length; + } + + return len; +} + +#define ADVANCE(w, r, l) do { r -= l; w += l; } while (0) + /** * bgp_encode_attrs - encode BGP attributes + * @p: BGP instance * @w: buffer * @attrs: a list of extended attributes * @remains: remaining space in the buffer @@ -182,11 +323,11 @@ bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, unsigned val) * Result: Length of the attribute block generated. */ unsigned int -bgp_encode_attrs(byte *w, ea_list *attrs, int remains) +bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains) { unsigned int i, code, flags; byte *start = w; - int len; + int len, rv; for(i=0; i<attrs->count; i++) { @@ -198,43 +339,90 @@ bgp_encode_attrs(byte *w, ea_list *attrs, int remains) if (code == BA_NEXT_HOP) continue; #endif - flags = a->flags & (BAF_OPTIONAL | BAF_TRANSITIVE | BAF_PARTIAL); - if (ATTR_KNOWN(code)) - { - struct attr_desc *desc = &bgp_attr_table[code]; - len = desc->expected_length; - if (len < 0) - { - ASSERT(!(a->type & EAF_EMBEDDED)); - len = a->u.ptr->length; - } - } - else + + /* When AS4-aware BGP speaker is talking to non-AS4-aware BGP speaker, + * we have to convert our 4B AS_PATH to 2B AS_PATH and send our AS_PATH + * as optional AS4_PATH attribute. + */ + if ((code == BA_AS_PATH) && bgp_as4_support && (! p->as4_session)) { - ASSERT((a->type & EAF_TYPE_MASK) == EAF_TYPE_OPAQUE); len = a->u.ptr->length; + + if (remains < (len + 4)) + goto err_no_buffer; + + /* Using temporary buffer because don't know a length of created attr + * and therefore a length of a header. Perhaps i should better always + * use BAF_EXT_LEN. */ + + byte buf[len]; + int new_used; + int nl = as_path_convert_to_old(a->u.ptr, buf, &new_used); + + rv = bgp_encode_attr_hdr(w, BAF_TRANSITIVE, BA_AS_PATH, nl); + ADVANCE(w, remains, rv); + memcpy(w, buf, nl); + ADVANCE(w, remains, nl); + + if (! new_used) + continue; + + if (remains < (len + 4)) + goto err_no_buffer; + + /* We should discard AS_CONFED_SEQUENCE or AS_CONFED_SET path segments + * here but we don't support confederations and such paths we already + * discarded in bgp_check_as_path(). + */ + + rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_PATH, len); + ADVANCE(w, remains, rv); + memcpy(w, a->u.ptr->data, len); + ADVANCE(w, remains, len); + + continue; } - DBG("\tAttribute %02x (type %02x, %d bytes, flags %02x)\n", code, a->type, len, flags); - if (remains < len + 4) - { - log(L_ERR "BGP: attribute list too long, ignoring the remaining attributes"); - break; - } - if (len < 256) - { - *w++ = flags; - *w++ = code; - *w++ = len; - remains -= 3; - } - else + + /* The same issue with AGGREGATOR attribute */ + if ((code == BA_AGGREGATOR) && bgp_as4_support && (! p->as4_session)) { - *w++ = flags | BAF_EXT_LEN; - *w++ = code; - put_u16(w, len); - w += 2; - remains -= 4; + int new_used; + + len = 6; + if (remains < (len + 3)) + goto err_no_buffer; + + rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AGGREGATOR, len); + ADVANCE(w, remains, rv); + aggregator_convert_to_old(a->u.ptr, w, &new_used); + ADVANCE(w, remains, len); + + if (! new_used) + continue; + + len = 8; + if (remains < (len + 3)) + goto err_no_buffer; + + rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_AGGREGATOR, len); + ADVANCE(w, remains, rv); + memcpy(w, a->u.ptr->data, len); + ADVANCE(w, remains, len); + + continue; } + + /* Standard path continues here ... */ + + flags = a->flags & (BAF_OPTIONAL | BAF_TRANSITIVE | BAF_PARTIAL); + len = bgp_get_attr_len(a); + + if (remains < len + 4) + goto err_no_buffer; + + rv = bgp_encode_attr_hdr(w, flags, code, len); + ADVANCE(w, remains, rv); + switch (a->type & EAF_TYPE_MASK) { case EAF_TYPE_INT: @@ -266,10 +454,13 @@ bgp_encode_attrs(byte *w, ea_list *attrs, int remains) default: bug("bgp_encode_attrs: unknown attribute type %02x", a->type); } - remains -= len; - w += len; + ADVANCE(w, remains, len); } return w - start; + + err_no_buffer: + log(L_ERR "BGP: attribute list too long, ignoring the remaining attributes"); + return w - start; } static void @@ -547,6 +738,7 @@ bgp_rt_notify(struct proto *P, net *n, rte *new, rte *old UNUSED, ea_list *attrs bgp_schedule_packet(p->conn, PKT_UPDATE); } + static int bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool) { @@ -559,20 +751,24 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p ea->flags = EALF_SORTED; ea->count = 4; - bgp_set_attr(ea->attrs, pool, BA_ORIGIN, + bgp_set_attr(ea->attrs, BA_ORIGIN, ((rta->source == RTS_OSPF_EXT1) || (rta->source == RTS_OSPF_EXT2)) ? ORIGIN_INCOMPLETE : ORIGIN_IGP); if (p->is_internal) - bgp_set_attr(ea->attrs+1, pool, BA_AS_PATH, 0); + bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 0); else { - z = bgp_set_attr(ea->attrs+1, pool, BA_AS_PATH, 4); + z = bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, bgp_as4_support ? 6 : 4); z[0] = AS_PATH_SEQUENCE; z[1] = 1; /* 1 AS */ - put_u16(z+2, p->local_as); + + if (bgp_as4_support) + put_u32(z+2, p->local_as); + else + put_u16(z+2, p->local_as); } - z = bgp_set_attr(ea->attrs+2, pool, BA_NEXT_HOP, sizeof(ip_addr)); + z = bgp_set_attr_wa(ea->attrs+2, pool, BA_NEXT_HOP, sizeof(ip_addr)); if (p->cf->next_hop_self || !p->is_internal || rta->dest != RTD_ROUTER) @@ -585,34 +781,65 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p else *(ip_addr *)z = e->attrs->gw; - bgp_set_attr(ea->attrs+3, pool, BA_LOCAL_PREF, 0); + bgp_set_attr(ea->attrs+3, BA_LOCAL_PREF, 0); return 0; /* Leave decision to the filters */ } -static ea_list * -bgp_path_prepend(struct linpool *pool, eattr *a, ea_list *old, int as) + +static inline int +bgp_as_path_loopy(struct bgp_proto *p, rta *a) { - struct ea_list *e = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); - struct adata *olda = a->u.ptr; + eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + return (e && as_path_is_member(e->u.ptr, p->local_as)); +} - e->next = old; - e->flags = EALF_SORTED; - e->count = 1; - e->attrs[0].id = EA_CODE(EAP_BGP, BA_AS_PATH); - e->attrs[0].flags = BAF_TRANSITIVE; - e->attrs[0].type = EAF_TYPE_AS_PATH; - e->attrs[0].u.ptr = as_path_prepend(pool, olda, as); - return e; +static inline int +bgp_originator_id_loopy(struct bgp_proto *p, rta *a) +{ + eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); + return (e && (e->u.data == p->local_id)); +} + +static inline int +bgp_cluster_list_loopy(struct bgp_proto *p, rta *a) +{ + eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); + return (e && p->rr_client && int_set_contains(e->u.ptr, p->rr_cluster_id)); +} + + +static inline void +bgp_path_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 as) +{ + eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + bgp_attach_attr(attrs, pool, BA_AS_PATH, (uintptr_t) as_path_prepend(pool, a->u.ptr, as)); +} + +static inline void +bgp_cluster_list_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 cid) +{ + eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); + bgp_attach_attr(attrs, pool, BA_CLUSTER_LIST, (uintptr_t) int_set_add(pool, a ? a->u.ptr : NULL, cid)); } static int -bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool) +bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool, int rr) { eattr *a; - if (!p->is_internal && (a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)))) - *attrs = bgp_path_prepend(pool, a, *attrs, p->local_as); + if (!p->is_internal) + { + bgp_path_prepend(e, attrs, pool, p->local_as); + + /* The MULTI_EXIT_DISC attribute received from a neighboring AS MUST NOT be + * propagated to other neighboring ASes. + * Perhaps it would be better to undefine it. + */ + a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); + if (a) + bgp_attach_attr(attrs, pool, BA_MULTI_EXIT_DISC, 0); + } a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP)); if (a && (p->is_internal || (!p->is_internal && e->attrs->iface == p->neigh->iface))) @@ -622,7 +849,24 @@ bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p else { /* Need to create new one */ - *(ip_addr *) bgp_attach_attr(attrs, pool, BA_NEXT_HOP, sizeof(ip_addr)) = p->local_addr; + bgp_attach_attr_ip(attrs, pool, BA_NEXT_HOP, p->local_addr); + } + + if (rr) + { + /* Handling route reflection, RFC 4456 */ + struct bgp_proto *src = (struct bgp_proto *) e->attrs->proto; + + a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); + if (!a) + bgp_attach_attr(attrs, pool, BA_ORIGINATOR_ID, src->remote_id); + + /* We attach proper cluster ID according to whether the route is entering or leaving the cluster */ + bgp_cluster_list_prepend(e, attrs, pool, src->rr_client ? src->rr_cluster_id : p->rr_cluster_id); + + /* Two RR clients with different cluster ID, hmmm */ + if (src->rr_client && p->rr_client && (src->rr_cluster_id != p->rr_cluster_id)) + bgp_cluster_list_prepend(e, attrs, pool, p->rr_cluster_id); } return 0; /* Leave decision to the filters */ @@ -639,14 +883,39 @@ bgp_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool * return -1; if (new_bgp) { + /* We should check here for cluster list loop, because the receiving BGP instance + might have different cluster ID */ + if (bgp_cluster_list_loopy(p, e->attrs)) + return -1; + if (p->local_as == new_bgp->local_as && p->is_internal && new_bgp->is_internal) - return -1; /* Don't redistribute internal routes with IBGP */ - return bgp_update_attrs(p, e, attrs, pool); + { + /* Redistribution of internal routes with IBGP */ + if (p->rr_client || new_bgp->rr_client) + /* Route reflection, RFC 4456 */ + return bgp_update_attrs(p, e, attrs, pool, 1); + else + return -1; + } + else + return bgp_update_attrs(p, e, attrs, pool, 0); } else return bgp_create_attrs(p, e, attrs, pool); } +static inline u32 +bgp_get_neighbor(rte *r) +{ + eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + u32 as; + + if (e && as_path_get_last(e->u.ptr, &as)) + return as; + else + return ((struct bgp_proto *) r->attrs->proto)->remote_as; +} + int bgp_rte_better(rte *new, rte *old) { @@ -665,20 +934,20 @@ bgp_rte_better(rte *new, rte *old) if (n < o) return 0; - /* Use AS path lengths */ + /* RFC 4271 9.1.2.2. a) Use AS path lengths */ if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths) { x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); - n = x ? as_path_getlen(x->u.ptr) : 100000; - o = y ? as_path_getlen(y->u.ptr) : 100000; + n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; + o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; if (n < o) return 1; if (n > o) return 0; } - /* Use origins */ + /* RFC 4271 9.1.2.2. b) Use origins */ x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); n = x ? x->u.data : ORIGIN_INCOMPLETE; @@ -688,47 +957,163 @@ bgp_rte_better(rte *new, rte *old) if (n > o) return 0; - /* Compare MED's */ - x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); - y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); - n = x ? x->u.data : new_bgp->cf->default_med; - o = y ? y->u.data : old_bgp->cf->default_med; + /* RFC 4271 9.1.2.2. c) Compare MED's */ + + if (bgp_get_neighbor(new) == bgp_get_neighbor(old)) + { + x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); + y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); + n = x ? x->u.data : new_bgp->cf->default_med; + o = y ? y->u.data : old_bgp->cf->default_med; + if (n < o) + return 1; + if (n > o) + return 0; + } + + /* RFC 4271 9.1.2.2. d) Prefer external peers */ + if (new_bgp->is_internal > old_bgp->is_internal) + return 0; + if (new_bgp->is_internal < old_bgp->is_internal) + return 1; + + /* Skipping RFC 4271 9.1.2.2. e) */ + /* We don't have interior distances */ + + /* RFC 4456 9. b) Compare cluster list lengths */ + x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); + y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); + n = x ? int_set_get_size(x->u.ptr) : 0; + o = y ? int_set_get_size(y->u.ptr) : 0; if (n < o) return 1; if (n > o) return 0; - /* A tie breaking procedure according to RFC 1771, section 9.1.2.1 */ - /* We don't have interior distances */ - /* We prefer external peers */ - if (new_bgp->is_internal > old_bgp->is_internal) - return 0; - if (new_bgp->is_internal < old_bgp->is_internal) + /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */ + /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighor ID */ + x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); + y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); + n = x ? x->u.data : new_bgp->remote_id; + o = y ? y->u.data : old_bgp->remote_id; + if (n < o) return 1; - /* Finally we compare BGP identifiers */ - return (new_bgp->remote_id < old_bgp->remote_id); + if (n > o) + return 0; + + + /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */ + return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0); } -static int -bgp_path_loopy(struct bgp_proto *p, eattr *a) +static struct adata * +bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool) +{ + struct adata *newa = lp_alloc(pool, sizeof(struct adata) + 8); + newa->length = 8; + aggregator_convert_to_new(old, newa->data); + return newa; +} + + +/* Take last req_as ASNs from path old2 (in 2B format), convert to 4B format + * and append path old4 (in 4B format). + */ +static struct adata * +bgp_merge_as_paths(struct adata *old2, struct adata *old4, int req_as, struct linpool *pool) +{ + byte buf[old2->length * 2]; + + int ol = as_path_convert_to_new(old2, buf, req_as); + int nl = ol + (old4 ? old4->length : 0); + + struct adata *newa = lp_alloc(pool, sizeof(struct adata) + nl); + newa->length = nl; + memcpy(newa->data, buf, ol); + if (old4) memcpy(newa->data + ol, old4->data, old4->length); + + return newa; +} + + +/* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC 4893 4.2.3 */ +static void +bgp_reconstruct_4b_atts(struct bgp_proto *p, rta *a, struct linpool *pool) +{ + eattr *p2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + eattr *p4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_PATH)); + eattr *a2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AGGREGATOR)); + eattr *a4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR)); + + if (a2) + { + u32 a2_as = get_u16(a2->u.ptr->data); + + if (a4) + { + if (a2_as != AS_TRANS) + { + /* Routes were aggregated by old router and therefore AS4_PATH + * and AS4_AGGREGATOR is invalid + * + * Convert AS_PATH and AGGREGATOR to 4B format and finish. + */ + + a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool); + p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool); + + return; + } + else + { + /* Common case, use AS4_AGGREGATOR attribute */ + a2->u.ptr = a4->u.ptr; + } + } + else + { + /* Common case, use old AGGREGATOR attribute */ + a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool); + + if (a2_as == AS_TRANS) + log(L_WARN "BGP: AGGREGATOR attribute contain AS_TRANS, but AS4_AGGREGATOR is missing"); + } + } + else + if (a4) + log(L_WARN "BGP: AS4_AGGREGATOR attribute received, but AGGREGATOR attribute is missing"); + + int p2_len = as_path_getlen(p2->u.ptr); + int p4_len = p4 ? as_path_getlen(p4->u.ptr) : AS_PATH_MAXLEN; + + if (p2_len < p4_len) + p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool); + else + p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, p4->u.ptr, p2_len - p4_len, pool); + +} + +static void +bgp_remove_as4_attrs(struct bgp_proto *p, rta *a) { - byte *path = a->u.ptr->data; - int len = a->u.ptr->length; - int i, n; + unsigned id1 = EA_CODE(EAP_BGP, BA_AS4_PATH); + unsigned id2 = EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR); + ea_list **el = &(a->eattrs); - while (len > 0) + /* We know that ea_lists constructed in bgp_decode_attrs have one attribute per ea_list struct */ + while (*el != NULL) { - n = path[1]; - len -= 2 + 2*n; - path += 2; - for(i=0; i<n; i++) + unsigned fid = (*el)->attrs[0].id; + + if ((fid == id1) || (fid == id2)) { - if (get_u16(path) == p->local_as) - return 1; - path += 2; + *el = (*el)->next; + if (p->as4_session) + log(L_WARN "BGP: Unexpected AS4_* attributes received"); } + else + el = &((*el)->next); } - return 0; } /** @@ -883,20 +1268,34 @@ bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct lin } } } + + /* When receiving attributes from non-AS4-aware BGP speaker, + * we have to reconstruct 4B AS_PATH and AGGREGATOR attributes + */ + if (bgp_as4_support && (! bgp->as4_session)) + bgp_reconstruct_4b_atts(bgp, a, pool); + + if (bgp_as4_support) + bgp_remove_as4_attrs(bgp, a); /* If the AS path attribute contains our AS, reject the routes */ - e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); - if (e && bgp_path_loopy(bgp, e)) - { - DBG("BGP: Path loop!\n"); - return NULL; - } + if (bgp_as_path_loopy(bgp, a)) + goto loop; + + /* Two checks for IBGP loops caused by route reflection, RFC 4456 */ + if (bgp_originator_id_loopy(bgp, a) || + bgp_cluster_list_loopy(bgp, a)) + goto loop; /* If there's no local preference, define one */ - if (!(seen[0] && (1 << BA_LOCAL_PREF))) + if (!(seen[0] & (1 << BA_LOCAL_PREF))) bgp_attach_attr(&a->eattrs, pool, BA_LOCAL_PREF, 0); return a; +loop: + DBG("BGP: Path loop!\n"); + return NULL; + malformed: bgp_error(conn, 3, 1, NULL, 0); return NULL; @@ -945,11 +1344,11 @@ bgp_get_route_info(rte *e, byte *buf, ea_list *attrs) { eattr *p = ea_find(attrs, EA_CODE(EAP_BGP, BA_AS_PATH)); eattr *o = ea_find(attrs, EA_CODE(EAP_BGP, BA_ORIGIN)); - int origas; + u32 origas; buf += bsprintf(buf, " (%d) [", e->pref); - if (p && (origas = as_path_get_first(p->u.ptr)) >= 0) - buf += bsprintf(buf, "AS%d", origas); + if (p && as_path_get_first(p->u.ptr, &origas)) + buf += bsprintf(buf, "AS%u", origas); if (o) buf += bsprintf(buf, "%c", "ie?"[o->u.data]); strcpy(buf, "]"); diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index cedd223..0d580be 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -76,11 +76,16 @@ static void bgp_connect(struct bgp_proto *p); static void bgp_initiate(struct bgp_proto *p); static void bgp_setup_listen_sk(void); + static void -bgp_close(struct bgp_proto *p UNUSED) +bgp_close(struct bgp_proto *p) { ASSERT(bgp_counter); bgp_counter--; + + if (p->cf->password) + sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, NULL); + if (!bgp_counter) { rfree(bgp_listen_sk); @@ -329,6 +334,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c bgp_setup_conn(p, conn); bgp_setup_sk(p, conn, s); s->tx_hook = bgp_connected; + s->password = p->cf->password; conn->state = BS_CONNECT; if (sk_open(s)) { @@ -479,6 +485,13 @@ bgp_start_locked(struct object_lock *lock) p->local_id = cf->c.global->router_id; p->next_hop = cf->multihop ? cf->multihop_via : cf->remote_ip; p->neigh = neigh_find(&p->p, &p->next_hop, NEF_STICKY); + + if (cf->rr_client) + { + p->rr_cluster_id = cf->rr_cluster_id ? cf->rr_cluster_id : p->local_id; + p->rr_client = cf->rr_client; + } + if (!p->neigh) { log(L_ERR "%s: Invalid next hop %I", p->p.name, p->next_hop); @@ -505,6 +518,7 @@ bgp_start(struct proto *P) bgp_counter++; bgp_setup_listen_sk(); + if (!bgp_linpool) bgp_linpool = lp_new(&root_pool, 4080); @@ -522,6 +536,17 @@ bgp_start(struct proto *P) lock->hook = bgp_start_locked; lock->data = p; olock_acquire(lock); + + /* We should create security association after we get a lock not to + * break existing connections. + */ + if (p->cf->password) + { + int rv = sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->password); + if (rv < 0) + return PS_STOP; + } + return PS_START; } @@ -611,6 +636,14 @@ bgp_check(struct bgp_config *c) cf_error("Local AS number must be set"); if (!c->remote_as) cf_error("Neighbor must be configured"); + if (!bgp_as4_support && c->enable_as4) + cf_error("AS4 support disabled globbaly"); + if (!c->enable_as4 && (c->local_as > 0xFFFF)) + cf_error("Local AS number out of range"); + if (!c->enable_as4 && (c->remote_as > 0xFFFF)) + cf_error("Neighbor AS number out of range"); + if ((c->local_as != c->remote_as) && (c->rr_client)) + cf_error("Only internal neighbor can be RR client"); } static void diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 6519db8..1d67e33 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -16,7 +16,7 @@ struct eattr; struct bgp_config { struct proto_config c; - unsigned int local_as, remote_as; + u32 local_as, remote_as; ip_addr remote_ip; int multihop; /* Number of hops if multihop */ ip_addr multihop_via; /* Multihop: address to route to */ @@ -25,6 +25,9 @@ struct bgp_config { int compare_path_lengths; /* Use path lengths when selecting best route */ u32 default_local_pref; /* Default value for LOCAL_PREF attribute */ u32 default_med; /* Default value for MULTI_EXIT_DISC attribute */ + int enable_as4; /* Enable local support for 4B AS numbers [RFC4893] */ + u32 rr_cluster_id; /* Route reflector cluster ID, if different from local ID */ + int rr_client; /* Whether neighbor is RR client of me */ unsigned connect_retry_time; unsigned hold_time, initial_hold_time; unsigned keepalive_time; @@ -33,6 +36,7 @@ struct bgp_config { unsigned error_delay_time_min; /* Time to wait after an error is detected */ unsigned error_delay_time_max; unsigned disable_after_error; /* Disable the protocol when error is detected */ + char *password; /* Password used for MD5 authentication */ }; struct bgp_conn { @@ -47,16 +51,21 @@ struct bgp_conn { byte *notify_data; int error_flag; /* Error state, ignore all input */ int primary; /* This connection is primary */ + u32 advertised_as; /* Temporary value for AS number received */ unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */ }; struct bgp_proto { struct proto p; struct bgp_config *cf; /* Shortcut to BGP configuration */ - unsigned local_as, remote_as; + u32 local_as, remote_as; int is_internal; /* Internal BGP connection (local_as == remote_as) */ + int as4_support; /* Peer supports 4B AS numbers [RFC4893] */ + int as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */ u32 local_id; /* BGP identifier of this router */ u32 remote_id; /* BGP identifier of the neighbor */ + u32 rr_cluster_id; /* Route reflector cluster ID */ + int rr_client; /* Whether neighbor is RR client of me */ struct bgp_conn *conn; /* Connection we have established */ struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */ struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */ @@ -100,6 +109,9 @@ struct bgp_bucket { extern struct linpool *bgp_linpool; +extern int bgp_as4_support; + + void bgp_start_timer(struct timer *t, int value); void bgp_check(struct bgp_config *c); void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len); @@ -115,17 +127,21 @@ void bgp_close_conn(struct bgp_conn *c); /* attrs.c */ -byte *bgp_attach_attr(struct ea_list **to, struct linpool *, unsigned attr, unsigned val); +void bgp_attach_attr(struct ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val); +byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned attr, unsigned len); struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, unsigned int len, struct linpool *pool, int mandatory); int bgp_get_attr(struct eattr *e, byte *buf); int bgp_rte_better(struct rte *, struct rte *); void bgp_rt_notify(struct proto *, struct network *, struct rte *, struct rte *, struct ea_list *); int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *); void bgp_attr_init(struct bgp_proto *); -unsigned int bgp_encode_attrs(byte *w, struct ea_list *attrs, int remains); +unsigned int bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains); void bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck); void bgp_get_route_info(struct rte *, byte *buf, struct ea_list *attrs); +inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, unsigned attr, ip_addr a) +{ *(ip_addr *) bgp_attach_attr_wa(to, pool, attr, sizeof(ip_addr)) = a; } + /* packets.c */ void bgp_schedule_packet(struct bgp_conn *conn, int type); @@ -165,6 +181,8 @@ void bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subco #define BA_MP_REACH_NLRI 0x0e /* [RFC2283] */ #define BA_MP_UNREACH_NLRI 0x0f #define BA_EXTENDED_COMM 0x10 /* draft-ramachandra-bgp-ext-communities */ +#define BA_AS4_PATH 0x11 /* [RFC4893] */ +#define BA_AS4_AGGREGATOR 0x12 /* BGP states */ diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 52ad731..8524b2d 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -18,9 +18,10 @@ CF_DECLS CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, PATH, METRIC, - ERROR, START, DELAY, FORGET, WAIT, DISABLE, AFTER, + ERROR, START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER, BGP_PATH, BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP, - BGP_ATOMIC_AGGR, BGP_AGGREGATOR, BGP_COMMUNITY, SOURCE, ADDRESS) + BGP_ATOMIC_AGGR, BGP_AGGREGATOR, BGP_COMMUNITY, SOURCE, ADDRESS, + PASSWORD, RR, CLIENT, CLUSTER, ID, AS4) CF_GRAMMAR @@ -32,27 +33,28 @@ bgp_proto_start: proto_start BGP { BGP_CFG->hold_time = 240; BGP_CFG->connect_retry_time = 120; BGP_CFG->initial_hold_time = 240; - BGP_CFG->default_med = ~0; /* RFC 1771 doesn't specify this, draft-09 says ~0 */ + BGP_CFG->default_med = 0; BGP_CFG->compare_path_lengths = 1; BGP_CFG->start_delay_time = 5; BGP_CFG->error_amnesia_time = 300; BGP_CFG->error_delay_time_min = 60; BGP_CFG->error_delay_time_max = 300; + BGP_CFG->enable_as4 = bgp_as4_support; } ; bgp_proto: bgp_proto_start proto_name '{' | bgp_proto proto_item ';' - | bgp_proto LOCAL AS expr ';' { - if ($4 < 0 || $4 > 65535) cf_error("AS number out of range"); - BGP_CFG->local_as = $4; - } + | bgp_proto LOCAL AS expr ';' { BGP_CFG->local_as = $4; } | bgp_proto NEIGHBOR ipa AS expr ';' { - if ($5 < 0 || $5 > 65535) cf_error("AS number out of range"); + if (ipa_nonzero(BGP_CFG->remote_ip)) cf_error("Only one neighbor per BGP instance is allowed"); + BGP_CFG->remote_ip = $3; BGP_CFG->remote_as = $5; } + | bgp_proto RR CLUSTER ID expr ';' { BGP_CFG->rr_cluster_id = $5; } + | bgp_proto RR CLIENT ';' { BGP_CFG->rr_client = 1; } | bgp_proto HOLD TIME expr ';' { BGP_CFG->hold_time = $4; } | bgp_proto STARTUP HOLD TIME expr ';' { BGP_CFG->initial_hold_time = $5; } | bgp_proto CONNECT RETRY TIME expr ';' { BGP_CFG->connect_retry_time = $5; } @@ -67,6 +69,8 @@ bgp_proto: | bgp_proto ERROR FORGET TIME expr ';' { BGP_CFG->error_amnesia_time = $5; } | bgp_proto ERROR WAIT TIME expr ',' expr ';' { BGP_CFG->error_delay_time_min = $5; BGP_CFG->error_delay_time_max = $7; } | bgp_proto DISABLE AFTER ERROR bool ';' { BGP_CFG->disable_after_error = $5; } + | bgp_proto ENABLE AS4 bool ';' { BGP_CFG->enable_as4 = $4; } + | bgp_proto PASSWORD TEXT ';' { BGP_CFG->password = $3; } ; CF_ADDTO(dynamic_attr, BGP_PATH diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 2e6f0b6..c18c6e4 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -12,6 +12,7 @@ #include "nest/iface.h" #include "nest/protocol.h" #include "nest/route.h" +#include "nest/attrs.h" #include "conf/conf.h" #include "lib/unaligned.h" #include "lib/socket.h" @@ -30,33 +31,64 @@ bgp_create_notification(struct bgp_conn *conn, byte *buf) return buf + 2 + conn->notify_size; } +#ifdef IPV6 +static byte * +bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf) +{ + *buf++ = 1; /* Capability 1: Multiprotocol extensions */ + *buf++ = 4; /* Capability data length */ + *buf++ = 0; /* We support AF IPv6 */ + *buf++ = BGP_AF_IPV6; + *buf++ = 0; /* RFU */ + *buf++ = 1; /* and SAFI 1 */ + return buf; +} +#endif + +static byte * +bgp_put_cap_as4(struct bgp_conn *conn, byte *buf) +{ + *buf++ = 65; /* Capability 65: Support for 4-octet AS number */ + *buf++ = 4; /* Capability data length */ + put_u32(buf, conn->bgp->local_as); + return buf + 4; +} + static byte * bgp_create_open(struct bgp_conn *conn, byte *buf) { struct bgp_proto *p = conn->bgp; + byte *cap; + int cap_len; BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)", BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id); buf[0] = BGP_VERSION; - put_u16(buf+1, p->local_as); + put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS); put_u16(buf+3, p->cf->hold_time); put_u32(buf+5, p->local_id); -#ifndef IPV6 - buf[9] = 0; /* No optional parameters */ - return buf+10; -#else - buf += 9; - *buf++ = 8; /* Optional params len */ - *buf++ = 2; /* Option: Capability list */ - *buf++ = 6; /* Option length */ - *buf++ = 1; /* Capability 1: Multiprotocol extensions */ - *buf++ = 4; /* Capability data length */ - *buf++ = 0; /* We support AF IPv6 */ - *buf++ = BGP_AF_IPV6; - *buf++ = 0; /* RFU */ - *buf++ = 1; /* and SAFI 1 */ - return buf; + /* Skipped 3 B for length field and Capabilities parameter header */ + cap = buf + 12; + +#ifdef IPV6 + cap = bgp_put_cap_ipv6(conn, cap); #endif + if (p->cf->enable_as4) + cap = bgp_put_cap_as4(conn, cap); + + cap_len = cap - buf - 12; + if (cap_len > 0) + { + buf[9] = cap_len + 2; /* Optional params len */ + buf[10] = 2; /* Option: Capability list */ + buf[11] = cap_len; /* Option length */ + return cap; + } + else + { + buf[9] = 0; /* No optional parameters */ + return buf + 10; + } } static unsigned int @@ -118,7 +150,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) continue; } DBG("Processing bucket %p\n", buck); - a_size = bgp_encode_attrs(w+2, buck->eattrs, 1024); + a_size = bgp_encode_attrs(p, w+2, buck->eattrs, 1024); put_u16(w, a_size); w += a_size + 2; r_size = bgp_encode_prefixes(p, w, buck, remains - a_size); @@ -161,12 +193,12 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) { DBG("Withdrawn routes:\n"); - tmp = bgp_attach_attr(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8); + tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8); *tmp++ = 0; *tmp++ = BGP_AF_IPV6; *tmp++ = 1; ea->attrs[0].u.ptr->length = bgp_encode_prefixes(p, tmp, buck, remains-11); - size = bgp_encode_attrs(w, ea, remains); + size = bgp_encode_attrs(p, w, ea, remains); w += size; remains -= size; } @@ -183,10 +215,10 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) continue; } DBG("Processing bucket %p\n", buck); - size = bgp_encode_attrs(w, buck->eattrs, 1024); + size = bgp_encode_attrs(p, w, buck->eattrs, 1024); w += size; remains -= size; - tstart = tmp = bgp_attach_attr(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8); + tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8); *tmp++ = 0; *tmp++ = BGP_AF_IPV6; *tmp++ = 1; @@ -230,7 +262,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) *tmp++ = 0; /* No SNPA information */ tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1)); ea->attrs[0].u.ptr->length = tmp - tstart; - w += bgp_encode_attrs(w, ea, remains); + w += bgp_encode_attrs(p, w, ea, remains); break; } } @@ -353,9 +385,50 @@ bgp_tx(sock *sk) ; } +/* Capatibility negotiation as per RFC 2842 */ + +void +bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len) +{ + struct bgp_proto *p = conn->bgp; + int cl; + u32 as; + + while (len > 0) + { + if (len < 2 || len < 2 + opt[1]) + goto err; + + cl = opt[1]; + + switch (opt[0]) + { + case 65: + if (cl != 4) + goto err; + p->as4_support = 1; + p->as4_session = p->cf->enable_as4; + if (p->as4_session) + conn->advertised_as = get_u32(opt + 2); + break; + + /* We can safely ignore all other capabilities */ + } + len -= 2 + cl; + opt += 2 + cl; + } + return; + + err: + bgp_error(conn, 2, 0, NULL, 0); + return; +} + static int bgp_parse_options(struct bgp_conn *conn, byte *opt, int len) { + int ol; + while (len > 0) { if (len < 2 || len < 2 + opt[1]) @@ -369,12 +442,14 @@ bgp_parse_options(struct bgp_conn *conn, byte *opt, int len) DBG("\n"); } #endif + + ol = opt[1]; switch (opt[0]) { case 2: - /* Capatibility negotiation as per RFC 2842 */ - /* We can safely ignore all capabilities announced */ + bgp_parse_capabilities(conn, opt + 2, ol); break; + default: /* * BGP specs don't tell us to send which option @@ -382,11 +457,11 @@ bgp_parse_options(struct bgp_conn *conn, byte *opt, int len) * to do so. Also, capability negotiation with * Cisco routers doesn't work without that. */ - bgp_error(conn, 2, 4, opt, opt[1]); + bgp_error(conn, 2, 4, opt, ol); return 0; } - len -= 2 + opt[1]; - opt += 2 + opt[1]; + len -= 2 + ol; + opt += 2 + ol; } return 0; } @@ -397,7 +472,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) struct bgp_conn *other; struct bgp_proto *p = conn->bgp; struct bgp_config *cf = p->cf; - unsigned as, hold; + unsigned hold; u32 id; /* Check state */ @@ -409,20 +484,27 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) { bgp_error(conn, 1, 2, pkt+16, 2); return; } if (pkt[19] != BGP_VERSION) { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */ - as = get_u16(pkt+20); + conn->advertised_as = get_u16(pkt+20); hold = get_u16(pkt+22); id = get_u32(pkt+24); - BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", as, hold, id); - if (cf->remote_as && as != p->remote_as) - { bgp_error(conn, 2, 2, pkt+20, -2); return; } - if (hold > 0 && hold < 3) - { bgp_error(conn, 2, 6, pkt+22, 2); return; } - p->remote_id = id; + BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id); + + p->remote_id = id; // ??? if (bgp_parse_options(conn, pkt+29, pkt[28])) return; + + if (hold > 0 && hold < 3) + { bgp_error(conn, 2, 6, pkt+22, 2); return; } + if (!id || id == 0xffffffff || id == p->local_id) { bgp_error(conn, 2, 3, pkt+24, -4); return; } + + if (conn->advertised_as != p->remote_as) + { + bgp_error(conn, 2, 2, (byte *) &(conn->advertised_as), -4); return; + } + /* Check the other connection */ other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn; switch (other->state) @@ -463,7 +545,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) else conn->hold_time = p->cf->hold_time; conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3; - p->remote_as = as; + // p->remote_as = conn->advertised_as; p->remote_id = id; DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id); @@ -621,7 +703,7 @@ bgp_do_rx_update(struct bgp_conn *conn, /* Create fake NEXT_HOP attribute */ if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2) goto bad; - memcpy(bgp_attach_attr(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, 16), x+1, 16); + bgp_attach_attr_ip(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, x[1]); len -= *x + 2; x += *x + 1; @@ -720,7 +802,7 @@ static struct { { 2, 4, "Unsupported optional parameter" }, { 2, 5, "Authentication failure" }, { 2, 6, "Unacceptable hold time" }, - { 2, 7, "Required capability missing" }, /* capability negotiation draft */ + { 2, 7, "Required capability missing" }, /* [RFC3392] */ { 3, 0, "Invalid UPDATE message" }, { 3, 1, "Malformed attribute list" }, { 3, 2, "Unrecognized well-known attribute" }, diff --git a/sysdep/linux/netlink/netlink.c b/sysdep/linux/netlink/netlink.c index 4784195..38d00af 100644 --- a/sysdep/linux/netlink/netlink.c +++ b/sysdep/linux/netlink/netlink.c @@ -499,6 +499,8 @@ nl_send_route(struct krt_proto *p, rte *e, int new) nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, a->gw); break; case RTD_DEVICE: + if (!a->iface) + return; r.r.rtm_type = RTN_UNICAST; nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index); break; @@ -532,11 +534,8 @@ krt_set_notify(struct krt_proto *p, net *n UNUSED, rte *new, rte *old) else { if (old) - { - if (!old->attrs->iface || (old->attrs->iface->flags & IF_UP)) - nl_send_route(p, old, 0); - /* else the kernel has already flushed it */ - } + nl_send_route(p, old, 0); + if (new) nl_send_route(p, new, 1); } diff --git a/sysdep/linux/sysio.h b/sysdep/linux/sysio.h index 3a29cdc..b0aff71 100644 --- a/sysdep/linux/sysio.h +++ b/sysdep/linux/sysio.h @@ -139,3 +139,24 @@ static inline char *sysio_mcast_join(sock *s) #endif #endif + +#include <linux/socket.h> +#include <linux/tcp.h> + +/* For the case that we have older kernel headers */ +/* Copied from Linux kernel file include/linux/tcp.h */ + +#ifndef TCP_MD5SIG + +#define TCP_MD5SIG 14 +#define TCP_MD5SIG_MAXKEYLEN 80 + +struct tcp_md5sig { + struct __kernel_sockaddr_storage tcpm_addr; /* address associated */ + __u16 __tcpm_pad1; /* zero */ + __u16 tcpm_keylen; /* key length */ + __u32 __tcpm_pad2; /* zero */ + __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */ +}; + +#endif diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index fa471f6..8cec2cd 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -546,6 +546,7 @@ sk_new(pool *p) s->err_hook = NULL; s->fd = -1; s->rbuf_alloc = s->tbuf_alloc = NULL; + s->password = NULL; return s; } @@ -642,6 +643,71 @@ bad: return err; } + +/* FIXME: check portability */ + +static int +sk_set_md5_auth_int(sock *s, sockaddr *sa, char *passwd) +{ + struct tcp_md5sig md5; + + memset(&md5, 0, sizeof(md5)); + memcpy(&md5.tcpm_addr, (struct sockaddr *) sa, sizeof(*sa)); + + if (passwd) + { + int len = strlen(passwd); + + if (len > TCP_MD5SIG_MAXKEYLEN) + { + log(L_ERR "MD5 password too long"); + return -1; + } + + md5.tcpm_keylen = len; + memcpy(&md5.tcpm_key, passwd, len); + } + + int rv = setsockopt(s->fd, IPPROTO_TCP, TCP_MD5SIG, &md5, sizeof(md5)); + + if (rv < 0) + { + if (errno == ENOPROTOOPT) + log(L_ERR "Kernel does not support TCP MD5 signatures"); + else + log(L_ERR "sk_set_md5_auth_int: setsockopt: %m"); + } + + return rv; +} + +/** + * sk_set_md5_auth - add / remove MD5 security association for given socket. + * @s: socket + * @a: IP address of the other side + * @passwd: password used for MD5 authentication + * + * In TCP MD5 handling code in kernel, there is a set of pairs + * (address, password) used to choose password according to + * address of the other side. This function is useful for + * listening socket, for active sockets it is enough to set + * s->password field. + * + * When called with passwd != NULL, the new pair is added, + * When called with passwd == NULL, the existing pair is removed. + * + * Result: 0 for success, -1 for an error. + */ + +int +sk_set_md5_auth(sock *s, ip_addr a, char *passwd) +{ + sockaddr sa; + fill_in_sockaddr(&sa, a, 0); + return sk_set_md5_auth_int(s, &sa, passwd); +} + + static void sk_tcp_connected(sock *s) { @@ -805,6 +871,14 @@ sk_open(sock *s) ERR("bind"); } fill_in_sockaddr(&sa, s->daddr, s->dport); + + if (s->password) + { + int rv = sk_set_md5_auth_int(s, &sa, s->password); + if (rv < 0) + goto bad_no_log; + } + switch (type) { case SK_TCP_ACTIVE: @@ -846,6 +920,7 @@ sk_open(sock *s) bad: log(L_ERR "sk_open: %s: %m", err); +bad_no_log: close(fd); s->fd = -1; return -1; diff --git a/sysdep/unix/krt-set.c b/sysdep/unix/krt-set.c index bd56448..23cbe5c 100644 --- a/sysdep/unix/krt-set.c +++ b/sysdep/unix/krt-set.c @@ -61,6 +61,8 @@ krt_ioctl(int ioc, rte *e, char *name) re.rt_flags |= RTF_GATEWAY; break; case RTD_DEVICE: + if (!a->iface) + return; re.rt_dev = a->iface->name; break; #ifdef RTF_REJECT diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index a6d1727..5269eb7 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -684,7 +684,7 @@ krt_notify(struct proto *P, net *net, rte *new, rte *old, struct ea_list *attrs { struct krt_proto *p = (struct krt_proto *) P; - if (shutting_down && KRT_CF->persist) + if (shutting_down) return; if (new && (!krt_capable(new) || new->attrs->source == RTS_INHERIT)) new = NULL; |