summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOndrej Zajicek <santiago@crfreenet.org>2010-07-12 17:39:39 +0200
committerOndrej Zajicek <santiago@crfreenet.org>2010-07-12 17:39:39 +0200
commit9be9a264137cdd881f339c37d1a1918527924254 (patch)
tree0b815a7ac09fb59a3e72aad6ce713c7a77ed0662
parentcfe34a316e35a209fcd814ccf3523c262e8d4b0a (diff)
downloadbird-9be9a264137cdd881f339c37d1a1918527924254.tar
bird-9be9a264137cdd881f339c37d1a1918527924254.zip
Implements proper multihop BGP.
Also does some incompatible changes to config file syntax, like removing 'via IP' from multihop option.
-rw-r--r--doc/bird.sgml46
-rw-r--r--proto/bgp/attrs.c14
-rw-r--r--proto/bgp/bgp.c50
-rw-r--r--proto/bgp/bgp.h9
-rw-r--r--proto/bgp/config.Y4
-rw-r--r--proto/bgp/packets.c4
-rw-r--r--sysdep/unix/io.c5
7 files changed, 80 insertions, 52 deletions
diff --git a/doc/bird.sgml b/doc/bird.sgml
index a6fa4df..82ff630 100644
--- a/doc/bird.sgml
+++ b/doc/bird.sgml
@@ -272,7 +272,7 @@ protocol rip {
listen to IPv6 connections only. This is needed if you want to
run both bird and bird6 on the same port.
- <tag>timeformat route|protocol|base|log "<m/format1/" [<m/limit> "<m/format2/"]</tag>
+ <tag>timeformat route|protocol|base|log "<m/format1/" [<m/limit/ "<m/format2/"]</tag>
This option allows to specify a format of date/time used by
BIRD. The first argument specifies for which purpose such
format is used. <cf/route/ is a format used in 'show route'
@@ -280,8 +280,8 @@ protocol rip {
command output, <cf/base/ is used for other commands and
<cf/log/ is used in a log file.
- "<m/format1/" is a format string using <i/strftime(3)/
- notation (see <i/man strftime/ for details). <m/limit> and
+ "<m/format1/" is a format string using <it/strftime(3)/
+ notation (see <it/man strftime/ for details). <m/limit> and
"<m/format2/" allow to specify the second format string for
times in past deeper than <m/limit/ seconds. There are two
shorthands: <cf/iso long/ is a ISO 8601 date/time format
@@ -976,10 +976,13 @@ This allows to set routing policy and all the other parameters differently
for each neighbor using the following configuration parameters:
<descrip>
- <tag>local as <m/number/</tag> Define which AS we are part of. (Note that
- contrary to other IP routers, BIRD is able to act as a router located
- in multiple AS'es simultaneously, but in such cases you need to tweak
- the BGP paths manually in the filters to get consistent behavior.)
+ <tag>local <m/[ip]/] as <m/number/</tag> Define which AS we
+ are part of. (Note that contrary to other IP routers, BIRD is
+ able to act as a router located in multiple AS'es
+ simultaneously, but in such cases you need to tweak the BGP
+ paths manually in the filters to get consistent behavior.)
+ Optional <cf/ip/ argument specifies a source address,
+ equivalent to the <cf/source address/ option (see below).
This parameter is mandatory.
<tag>neighbor <m/ip/ as <m/number/</tag> Define neighboring router
@@ -988,16 +991,27 @@ for each neighbor using the following configuration parameters:
of your router's interfaces. In case the neighbor is in the same AS
as we are, we automatically switch to iBGP. This parameter is mandatory.
- <tag>multihop <m/number/ via <m/ip/</tag> Configure multihop BGP to a
- neighbor which is connected at most <m/number/ hops far and to which
- we should route via our direct neighbor with address <m/ip/.
+ <tag>multihop <m/[number]/]</tag> Configure multihop BGP
+ session to a neighbor that isn't directly connected.
+ Accurately, this option should be used if the configured
+ neighbor IP address does not match with any local network
+ subnets. Such IP address have to be reachable through system
+ routing table. For multihop BGP it is recommended to
+ explicitly configure <cf/source address/ to have it
+ stable. Optional <cf/number/ argument can be used to limit TTL
+ (the number of hops).
Default: switched off.
+ <tag>source address <m/ip/</tag> Define local address we
+ should use for next hop calculation and as a source address
+ for the BGP session. Default: the address of the local
+ end of the interface our neighbor is connected to.
+
<tag>next hop self</tag> Avoid calculation of the Next Hop
- attribute and always advertise our own source address (see
- below) as a next hop. This needs to be used only occasionally
- to circumvent misconfigurations of other routers.
- Default: disabled.
+ attribute and always advertise our own source address as a
+ next hop. This needs to be used only occasionally to
+ circumvent misconfigurations of other routers. Default:
+ disabled.
<tag>missing lladdr self|drop|ignore</tag>Next Hop attribute
in BGP-IPv6 sometimes contains just the global IPv6 address,
@@ -1015,10 +1029,6 @@ for each neighbor using the following configuration parameters:
that case default is <cf/drop/, because route servers usually
does not forward packets ifselves.
- <tag>source address <m/ip/</tag> Define local address we should use
- for next hop calculation. Default: the address of the local end
- of the interface our neighbor is connected to.
-
<tag>password <m/string/</tag> Use this password for MD5 authentication
of BGP sessions. Default: no authentication. Password has to be set by
external utility (e.g. setkey(8)) on BSD systems.
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index 8743358..039828f 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -752,7 +752,7 @@ bgp_get_bucket(struct bgp_proto *p, net *n, ea_list *attrs, int originate)
/* Check if next hop is valid */
a = ea_find(new, EA_CODE(EAP_BGP, BA_NEXT_HOP));
- if (!a || ipa_equal(p->next_hop, *(ip_addr *)a->u.ptr->data))
+ if (!a || ipa_equal(p->cf->remote_ip, *(ip_addr *)a->u.ptr->data))
{
log(L_ERR "%s: Invalid NEXT_HOP attribute in route %I/%d", p->p.name, n->n.prefix, n->n.pxlen);
return NULL;
@@ -808,7 +808,6 @@ bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UN
bgp_schedule_packet(p->conn, PKT_UPDATE);
}
-
static int
bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool)
{
@@ -834,12 +833,14 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p
put_u32(z+2, p->local_as);
}
+ /* iBGP -> use gw, eBGP multi-hop -> use source_addr,
+ eBGP single-hop -> use gw if on the same iface */
z = bgp_set_attr_wa(ea->attrs+2, pool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
if (p->cf->next_hop_self ||
rta->dest != RTD_ROUTER ||
- ipa_equal(e->attrs->gw, IPA_NONE) ||
+ ipa_equal(rta->gw, IPA_NONE) ||
ipa_has_link_scope(rta->gw) ||
- (!p->is_internal && (rta->iface != p->neigh->iface)))
+ (!p->is_internal && (!p->neigh || (rta->iface != p->neigh->iface))))
set_next_hop(z, p->source_addr);
else
set_next_hop(z, rta->gw);
@@ -904,8 +905,11 @@ bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p
bgp_attach_attr(attrs, pool, BA_MULTI_EXIT_DISC, 0);
}
+ /* iBGP -> keep next_hop, eBGP multi-hop -> use source_addr,
+ eBGP single-hop -> keep next_hop if on the same iface */
a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
- if (a && !p->cf->next_hop_self && (p->is_internal || (!p->is_internal && e->attrs->iface == p->neigh->iface)))
+ if (a && !p->cf->next_hop_self &&
+ (p->is_internal || (p->neigh && (e->attrs->iface == p->neigh->iface))))
{
/* Leave the original next hop attribute, will check later where does it point */
}
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index b36c4a3..3aa8845 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -145,6 +145,10 @@ bgp_startup_timeout(timer *t)
static void
bgp_initiate(struct bgp_proto *p)
{
+ int rv = bgp_open(p);
+ if (rv < 0)
+ return;
+
if (p->startup_delay)
{
BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds", p->startup_delay);
@@ -347,6 +351,10 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
BGP_TRACE(D_EVENTS, "BGP session established");
DBG("BGP: UP!!!\n");
+ /* For multi-hop BGP sessions */
+ if (ipa_zero(p->source_addr))
+ p->source_addr = conn->sk->saddr;
+
p->conn = conn;
p->last_error_class = 0;
p->last_error_code = 0;
@@ -669,10 +677,11 @@ bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
static void
bgp_start_neighbor(struct bgp_proto *p)
{
- p->local_addr = p->neigh->iface->addr->ip;
- p->source_addr = ipa_nonzero(p->cf->source_addr) ? p->cf->source_addr : p->local_addr;
+ /* Called only for single-hop BGP sessions */
+
+ if (ipa_zero(p->source_addr))
+ p->source_addr = p->neigh->iface->addr->ip;
- DBG("BGP: local=%I remote=%I\n", p->source_addr, p->next_hop);
#ifdef IPV6
{
struct ifa *a;
@@ -691,10 +700,6 @@ bgp_start_neighbor(struct bgp_proto *p)
}
#endif
- int rv = bgp_open(p);
- if (rv < 0)
- return;
-
bgp_initiate(p);
}
@@ -742,25 +747,22 @@ bgp_start_locked(struct object_lock *lock)
if (p->p.proto_state != PS_START)
{
DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
- return;
+ return;
}
DBG("BGP: Got lock\n");
- p->local_id = proto_get_router_id(&cf->c);
- p->next_hop = cf->multihop ? cf->multihop_via : cf->remote_ip;
- p->neigh = neigh_find(&p->p, &p->next_hop, NEF_STICKY);
- if (cf->rr_client)
+ if (cf->multihop)
{
- p->rr_cluster_id = cf->rr_cluster_id ? cf->rr_cluster_id : p->local_id;
- p->rr_client = cf->rr_client;
+ /* Multi-hop sessions do not use neighbor entries */
+ bgp_initiate(p);
+ return;
}
- p->rs_client = cf->rs_client;
-
- if (!p->neigh)
+ p->neigh = neigh_find(&p->p, &cf->remote_ip, NEF_STICKY);
+ if (!p->neigh || (p->neigh->scope == SCOPE_HOST))
{
- log(L_ERR "%s: Invalid next hop %I", p->p.name, p->next_hop);
+ log(L_ERR "%s: Invalid remote address %I", p->p.name, cf->remote_ip);
/* As we do not start yet, we can just disable protocol */
p->p.disabled = 1;
bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
@@ -771,7 +773,7 @@ bgp_start_locked(struct object_lock *lock)
if (p->neigh->iface)
bgp_start_neighbor(p);
else
- BGP_TRACE(D_EVENTS, "Waiting for %I to become my neighbor", p->next_hop);
+ BGP_TRACE(D_EVENTS, "Waiting for %I to become my neighbor", cf->remote_ip);
}
static int
@@ -796,6 +798,9 @@ bgp_start(struct proto *P)
p->startup_timer->hook = bgp_startup_timeout;
p->startup_timer->data = p;
+ p->remote_id = 0;
+ p->source_addr = p->cf->source_addr;
+
/*
* Before attempting to create the connection, we need to lock the
* port, so that are sure we're the only instance attempting to talk
@@ -869,7 +874,13 @@ bgp_init(struct proto_config *C)
p->local_as = c->local_as;
p->remote_as = c->remote_as;
p->is_internal = (c->local_as == c->remote_as);
+ p->local_id = proto_get_router_id(C);
+ p->rs_client = c->rs_client;
+ p->rr_client = c->rr_client;
+ if (p->rr_client)
+ p->rr_cluster_id = c->rr_cluster_id ? c->rr_cluster_id : p->local_id;
p->igp_table = get_igp_table(c);
+
return P;
}
@@ -1046,7 +1057,6 @@ bgp_show_proto_info(struct proto *P)
cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
cli_msg(-1006, " Neighbor address: %I", p->cf->remote_ip);
- cli_msg(-1006, " Nexthop address: %I", p->next_hop);
cli_msg(-1006, " Source address: %I", p->source_addr);
cli_msg(-1006, " Neighbor caps: %s%s",
c->peer_refresh_support ? " refresh" : "",
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index 76844af..160aa3d 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -20,7 +20,6 @@ struct bgp_config {
u32 local_as, remote_as;
ip_addr remote_ip;
int multihop; /* Number of hops if multihop */
- ip_addr multihop_via; /* Multihop: address to route to */
ip_addr source_addr; /* Source address to use */
int next_hop_self; /* Always set next hop to local IP address */
int missing_lladdr; /* What we will do when we don' know link-local addr, see MLL_* */
@@ -89,10 +88,8 @@ struct bgp_proto {
struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */
struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */
struct object_lock *lock; /* Lock for neighbor connection */
- ip_addr next_hop; /* Either the peer or multihop_via */
- struct neighbor *neigh; /* Neighbor entry corresponding to next_hop */
- ip_addr local_addr; /* Address of the local end of the link to next_hop */
- ip_addr source_addr; /* Address used as advertised next hop, usually local_addr */
+ struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */
+ ip_addr source_addr; /* Local address used as an advertised next hop */
rtable *igp_table; /* Table used for recursive next hop lookups */
struct event *event; /* Event for respawning and shutting process */
struct timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */
@@ -109,7 +106,7 @@ struct bgp_proto {
#ifdef IPV6
byte *mp_reach_start, *mp_unreach_start; /* Multiprotocol BGP attribute notes */
unsigned mp_reach_len, mp_unreach_len;
- ip_addr local_link; /* Link-level version of local_addr */
+ ip_addr local_link; /* Link-level version of source_addr */
#endif
};
diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y
index a46431c..e591f89 100644
--- a/proto/bgp/config.Y
+++ b/proto/bgp/config.Y
@@ -54,6 +54,7 @@ bgp_proto:
bgp_proto_start proto_name '{'
| bgp_proto proto_item ';'
| bgp_proto LOCAL AS expr ';' { BGP_CFG->local_as = $4; }
+ | bgp_proto LOCAL ipa AS expr ';' { BGP_CFG->source_addr = $3; BGP_CFG->local_as = $5; }
| bgp_proto NEIGHBOR ipa AS expr ';' {
if (ipa_nonzero(BGP_CFG->remote_ip)) cf_error("Only one neighbor per BGP instance is allowed");
@@ -67,7 +68,8 @@ bgp_proto:
| bgp_proto STARTUP HOLD TIME expr ';' { BGP_CFG->initial_hold_time = $5; }
| bgp_proto CONNECT RETRY TIME expr ';' { BGP_CFG->connect_retry_time = $5; }
| bgp_proto KEEPALIVE TIME expr ';' { BGP_CFG->keepalive_time = $4; }
- | bgp_proto MULTIHOP expr VIA ipa ';' { BGP_CFG->multihop = $3; BGP_CFG->multihop_via = $5; }
+ | bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; }
+ | bgp_proto MULTIHOP expr ';' { BGP_CFG->multihop = $3; }
| bgp_proto NEXT HOP SELF ';' { BGP_CFG->next_hop_self = 1; }
| bgp_proto MISSING LLADDR SELF ';' { BGP_CFG->missing_lladdr = MLL_SELF; }
| bgp_proto MISSING LLADDR DROP ';' { BGP_CFG->missing_lladdr = MLL_DROP; }
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index 514e878..ee2c1b0 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -58,7 +58,7 @@ mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4)
buf+=4;
}
- put_u16(buf+0, p->neigh->iface->index);
+ put_u16(buf+0, p->neigh ? p->neigh->iface->index : 0);
put_u16(buf+2, BGP_AF);
buf+=4;
buf = ipa_put_addr(buf, conn->sk ? conn->sk->daddr : IPA_NONE);
@@ -402,7 +402,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf)
*/
n = neigh_find(&p->p, &ip, 0);
- if (n && n->iface == p->neigh->iface)
+ if (n && p->neigh && n->iface == p->neigh->iface)
{
if (second && ipa_nonzero(ipp[1]))
ip_ll = ipp[1];
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
index 690c8fc..c1837f0 100644
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -1004,6 +1004,11 @@ sk_leave_group(sock *s, ip_addr maddr)
static void
sk_tcp_connected(sock *s)
{
+ sockaddr lsa;
+ int lsa_len = sizeof(lsa);
+ if (getsockname(s->fd, (struct sockaddr *) &lsa, &lsa_len) == 0)
+ get_sockaddr(&lsa, &s->saddr, &s->sport, 1);
+
s->type = SK_TCP;
sk_alloc_bufs(s);
s->tx_hook(s);