diff options
author | Ondrej Zajicek <santiago@crfreenet.org> | 2008-12-19 01:34:39 +0100 |
---|---|---|
committer | Ondrej Zajicek <santiago@crfreenet.org> | 2008-12-19 01:34:39 +0100 |
commit | 11b32d911715cbfb3ce4c87685b1388e4b0de1c4 (patch) | |
tree | 48b86083df70a669f0abeaaeab7f89d94d952edb | |
parent | b933281ed5efb9ad9375c3ea41ee2412b9f89c15 (diff) | |
download | bird-11b32d911715cbfb3ce4c87685b1388e4b0de1c4.tar bird-11b32d911715cbfb3ce4c87685b1388e4b0de1c4.zip |
Major changes to BGP
Fixes two race conditions causing crash of Bird, several unhandled
cases during BGP initialization, and some other bugs. Also changes
handling of startup delay to be more useful and implement
reporting of last error in 'show protocols' command.
-rw-r--r-- | proto/bgp/attrs.c | 2 | ||||
-rw-r--r-- | proto/bgp/bgp.c | 496 | ||||
-rw-r--r-- | proto/bgp/bgp.h | 53 | ||||
-rw-r--r-- | proto/bgp/packets.c | 109 |
4 files changed, 464 insertions, 196 deletions
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index c13f905..d3716ea 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -365,6 +365,7 @@ bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains) int new_used; int nl = as_path_convert_to_old(a->u.ptr, buf, &new_used); + DBG("BGP: Encoding old AS_PATH\n"); rv = bgp_encode_attr_hdr(w, BAF_TRANSITIVE, BA_AS_PATH, nl); ADVANCE(w, remains, rv); memcpy(w, buf, nl); @@ -381,6 +382,7 @@ bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains) * discarded in bgp_check_as_path(). */ + DBG("BGP: Encoding AS4_PATH\n"); rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_PATH, len); ADVANCE(w, remains, rv); memcpy(w, a->u.ptr->data, len); diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 29d2e09..46b2890 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -53,7 +53,7 @@ * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams. */ -#undef LOCAL_DEBUG +#define LOCAL_DEBUG #include "nest/bird.h" #include "nest/iface.h" @@ -70,20 +70,69 @@ struct linpool *bgp_linpool; /* Global temporary pool */ static sock *bgp_listen_sk; /* Global listening socket */ static int bgp_counter; /* Number of protocol instances using the listening socket */ -static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established" }; +static void bgp_close(struct bgp_proto *p, int apply_md5); static void bgp_connect(struct bgp_proto *p); +static void bgp_active(struct bgp_proto *p, int delay); static void bgp_initiate(struct bgp_proto *p); -static void bgp_setup_listen_sk(void); +static void bgp_stop(struct bgp_proto *p); +static sock *bgp_setup_listen_sk(void); +/** + * bgp_open - open a BGP instance + * @p: BGP instance + * + * This function allocates and configures shared BGP resources. + * Should be called as the last step during initialization + * (when lock is acquired and neighbor is ready). + * When error, state changed to PS_DOWN, -1 is returned and caller + * should return immediately. + */ +static int +bgp_open(struct bgp_proto *p) +{ + bgp_counter++; + + if (!bgp_listen_sk) + bgp_listen_sk = bgp_setup_listen_sk(); + + if (!bgp_linpool) + bgp_linpool = lp_new(&root_pool, 4080); + + if (p->cf->password) + { + int rv = sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->password); + if (rv < 0) + { + bgp_close(p, 0); + p->p.disabled = 1; + bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_MD5); + proto_notify_state(&p->p, PS_DOWN); + return -1; + } + } + + p->start_state = BSS_CONNECT; + return 0; +} + +/** + * bgp_close - close a BGP instance + * @p: BGP instance + * @apply_md5: 0 to disable unsetting MD5 auth + * + * This function frees and deconfigures shared BGP resources. + * @apply_md5 is set to 0 when bgp_close is called as a cleanup + * from failed bgp_open(). + */ static void -bgp_close(struct bgp_proto *p) +bgp_close(struct bgp_proto *p, int apply_md5) { ASSERT(bgp_counter); bgp_counter--; - if (p->cf->password) + if (p->cf->password && apply_md5) sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, NULL); if (!bgp_counter) @@ -123,18 +172,11 @@ bgp_start_timer(timer *t, int value) * * This function takes a connection described by the &bgp_conn structure, * closes its socket and frees all resources associated with it. - * - * If the connection is being closed due to a protocol error, adjust - * the connection restart timer as well according to the error recovery - * policy set in the configuration. - * - * If the connection was marked as primary, it shuts down the protocol as well. */ void bgp_close_conn(struct bgp_conn *conn) { struct bgp_proto *p = conn->bgp; - struct bgp_config *cf = p->cf; DBG("BGP: Closing connection\n"); conn->packets_to_send = 0; @@ -146,54 +188,184 @@ bgp_close_conn(struct bgp_conn *conn) conn->hold_timer = NULL; rfree(conn->sk); conn->sk = NULL; - conn->state = BS_IDLE; - if (conn->error_flag > 1) + rfree(conn->tx_ev); + conn->tx_ev = NULL; +} + + +/** + * bgp_update_startup_delay - update a startup delay + * @p: BGP instance + * @conn: related BGP connection + * @code: BGP error code + * @subcode: BGP error subcode + * + * This function updates a startup delay that is used to postpone next BGP connect. + * It also handles disable_after_error and might stop BGP instance when error + * happened and disable_after_error is on. + * + * It should be called when BGP protocol error happened. + */ +void +bgp_update_startup_delay(struct bgp_proto *p, struct bgp_conn *conn, unsigned code, unsigned subcode) +{ + struct bgp_config *cf = p->cf; + + /* Don't handle cease messages as errors */ + if (code == 6 && !subcode) { - if (cf->disable_after_error) - p->p.disabled = 1; - if (p->last_connect && (bird_clock_t)(p->last_connect + cf->error_amnesia_time) < now) - p->startup_delay = 0; - if (!p->startup_delay) - p->startup_delay = cf->error_delay_time_min; - else - { - p->startup_delay *= 2; - if (p->startup_delay > cf->error_delay_time_max) - p->startup_delay = cf->error_delay_time_max; - } + p->startup_delay = 0; + return; + } + + /* During start, we only consider errors on outgoing connection, because + otherwise delay timer for outgoing connection is already running and + we could increase delay time two times (or more) per one attempt to + connect. + */ + if ((p->p.proto_state == PS_START) && (conn != &p->outgoing_conn)) + return; + + DBG("BGP: Updating startup delay %d %d\n", code, subcode); + + p->last_proto_error = now; + + if (cf->disable_after_error) + { + p->startup_delay = 0; + p->p.disabled = 1; + if (p->p.proto_state == PS_START) + bgp_stop(p); + + return; } - if (conn->primary) + + if (p->last_proto_error && ((now - p->last_proto_error) >= cf->error_amnesia_time)) + p->startup_delay = 0; + + if (!p->startup_delay) + p->startup_delay = cf->error_delay_time_min; + else { - bgp_close(p); - p->conn = NULL; - proto_notify_state(&p->p, PS_DOWN); + p->startup_delay *= 2; + if (p->startup_delay > cf->error_delay_time_max) + p->startup_delay = cf->error_delay_time_max; } - else if (conn->error_flag > 1) - bgp_initiate(p); } -static int -bgp_graceful_close_conn(struct bgp_conn *c) +static void +bgp_graceful_close_conn(struct bgp_conn *conn) { - switch (c->state) + switch (conn->state) { case BS_IDLE: - return 0; + case BS_CLOSE: + return; case BS_CONNECT: case BS_ACTIVE: - bgp_close_conn(c); - return 1; + bgp_conn_enter_idle_state(conn); + return; case BS_OPENSENT: case BS_OPENCONFIRM: case BS_ESTABLISHED: - bgp_error(c, 6, 0, NULL, 0); - return 1; + bgp_error(conn, 6, 0, NULL, 0); + return; default: - bug("bgp_graceful_close_conn: Unknown state %d", c->state); + bug("bgp_graceful_close_conn: Unknown state %d", conn->state); } } static void +bgp_down(struct bgp_proto *p) +{ + if (p->start_state > BSS_PREPARE) + bgp_close(p, 1); + + DBG("BGP: DOWN\n"); + proto_notify_state(&p->p, PS_DOWN); +} + +static void +bgp_decision(void *vp) +{ + struct bgp_proto *p = vp; + + DBG("BGP: Decision start\n"); + if ((p->p.proto_state == PS_START) + && (p->outgoing_conn.state == BS_IDLE)) + bgp_initiate(p); + + if ((p->p.proto_state == PS_STOP) + && (p->outgoing_conn.state == BS_IDLE) + && (p->incoming_conn.state == BS_IDLE)) + bgp_down(p); +} + +static void +bgp_stop(struct bgp_proto *p) +{ + proto_notify_state(&p->p, PS_STOP); + bgp_graceful_close_conn(&p->outgoing_conn); + bgp_graceful_close_conn(&p->incoming_conn); + ev_schedule(p->event); +} + +void +bgp_conn_enter_established_state(struct bgp_conn *conn) +{ + struct bgp_proto *p = conn->bgp; + + BGP_TRACE(D_EVENTS, "BGP session established"); + DBG("BGP: UP!!!\n"); + + p->conn = conn; + p->last_error_class = 0; + p->last_error_code = 0; + bgp_attr_init(conn->bgp); + conn->state = BS_ESTABLISHED; + proto_notify_state(&p->p, PS_UP); +} + +static void +bgp_conn_leave_established_state(struct bgp_proto *p) +{ + BGP_TRACE(D_EVENTS, "BGP session closed"); + p->conn = NULL; + + if (p->p.proto_state == PS_UP) + bgp_stop(p); +} + +void +bgp_conn_enter_close_state(struct bgp_conn *conn) +{ + struct bgp_proto *p = conn->bgp; + int os = conn->state; + + conn->state = BS_CLOSE; + tm_stop(conn->hold_timer); + tm_stop(conn->keepalive_timer); + conn->sk->rx_hook = NULL; + + if (os == BS_ESTABLISHED) + bgp_conn_leave_established_state(p); +} + +void +bgp_conn_enter_idle_state(struct bgp_conn *conn) +{ + struct bgp_proto *p = conn->bgp; + int os = conn->state; + + bgp_close_conn(conn); + conn->state = BS_IDLE; + ev_schedule(p->event); + + if (os == BS_ESTABLISHED) + bgp_conn_leave_established_state(p); +} + +static void bgp_send_open(struct bgp_conn *conn) { DBG("BGP: Sending open\n"); @@ -222,8 +394,13 @@ bgp_connect_timeout(timer *t) struct bgp_proto *p = conn->bgp; DBG("BGP: connect_timeout\n"); - bgp_close_conn(conn); - bgp_connect(p); + if (p->p.proto_state == PS_START) + { + bgp_close_conn(conn); + bgp_connect(p); + } + else + bgp_conn_enter_idle_state(conn); } static void @@ -232,26 +409,14 @@ bgp_sock_err(sock *sk, int err) struct bgp_conn *conn = sk->data; struct bgp_proto *p = conn->bgp; + bgp_store_error(p, conn, BE_SOCKET, err); + if (err) BGP_TRACE(D_EVENTS, "Connection lost (%M)", err); else BGP_TRACE(D_EVENTS, "Connection closed"); - switch (conn->state) - { - case BS_CONNECT: - case BS_OPENSENT: - rfree(conn->sk); - conn->sk = NULL; - conn->state = BS_ACTIVE; - bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time); - break; - case BS_OPENCONFIRM: - case BS_ESTABLISHED: - bgp_close_conn(conn); - break; - default: - bug("bgp_sock_err called in invalid state %d", conn->state); - } + + bgp_conn_enter_idle_state(conn); } static void @@ -280,8 +445,6 @@ bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn) conn->sk = NULL; conn->bgp = p; conn->packets_to_send = 0; - conn->error_flag = 0; - conn->primary = 0; t = conn->connect_retry_timer = tm_new(p->p.pool); t->hook = bgp_connect_timeout; @@ -292,6 +455,9 @@ bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn) t = conn->keepalive_timer = tm_new(p->p.pool); t->hook = bgp_keepalive_timeout; t->data = conn; + conn->tx_ev = ev_new(p->p.pool); + conn->tx_ev->hook = bgp_kick_tx; + conn->tx_ev->data = conn; } static void @@ -302,6 +468,17 @@ bgp_setup_sk(struct bgp_proto *p, struct bgp_conn *conn, sock *s) conn->sk = s; } +static void +bgp_active(struct bgp_proto *p, int delay) +{ + struct bgp_conn *conn = &p->outgoing_conn; + + BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay); + bgp_setup_conn(p, conn); + conn->state = BS_ACTIVE; + bgp_start_timer(conn->connect_retry_timer, delay); +} + /** * bgp_connect - initiate an outgoing connection * @p: BGP instance @@ -317,7 +494,6 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c struct bgp_conn *conn = &p->outgoing_conn; DBG("BGP: Connecting\n"); - p->last_connect = now; s = sk_new(p->p.pool); s->type = SK_TCP_ACTIVE; if (ipa_nonzero(p->cf->source_addr)) @@ -348,17 +524,10 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c static void bgp_initiate(struct bgp_proto *p) { - unsigned delay; + unsigned delay = MAX(p->startup_delay, p->cf->start_delay_time); - delay = p->cf->start_delay_time; - if (p->startup_delay > delay) - delay = p->startup_delay; if (delay) - { - BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay); - bgp_setup_conn(p, &p->outgoing_conn); - bgp_start_timer(p->outgoing_conn.connect_retry_timer, delay); - } + bgp_active(p, delay); else bgp_connect(p); } @@ -389,7 +558,7 @@ bgp_incoming_connection(sock *sk, int dummy UNUSED) if (ipa_equal(p->cf->remote_ip, sk->daddr)) { match = 1; - if ((p->p.proto_state == PS_START || p->p.proto_state == PS_UP) && p->neigh && p->neigh->iface) + if ((p->p.proto_state == PS_START || p->p.proto_state == PS_UP) && (p->start_state > BSS_PREPARE)) { BGP_TRACE(D_EVENTS, "Incoming connection from %I port %d", sk->daddr, sk->dport); if (p->incoming_conn.sk) @@ -411,27 +580,25 @@ bgp_incoming_connection(sock *sk, int dummy UNUSED) return 0; } -static void +static sock * bgp_setup_listen_sk(void) { - if (!bgp_listen_sk) + sock *s = sk_new(&root_pool); + DBG("BGP: Creating incoming socket\n"); + s->type = SK_TCP_PASSIVE; + s->sport = BGP_PORT; + s->tos = IP_PREC_INTERNET_CONTROL; + s->rbsize = BGP_RX_BUFFER_SIZE; + s->tbsize = BGP_TX_BUFFER_SIZE; + s->rx_hook = bgp_incoming_connection; + if (sk_open(s)) { - sock *s = sk_new(&root_pool); - DBG("BGP: Creating incoming socket\n"); - s->type = SK_TCP_PASSIVE; - s->sport = BGP_PORT; - s->tos = IP_PREC_INTERNET_CONTROL; - s->rbsize = BGP_RX_BUFFER_SIZE; - s->tbsize = BGP_TX_BUFFER_SIZE; - s->rx_hook = bgp_incoming_connection; - if (sk_open(s)) - { - log(L_ERR "Unable to open incoming BGP socket"); - rfree(s); - } - else - bgp_listen_sk = s; + log(L_ERR "Unable to open incoming BGP socket"); + rfree(s); + return NULL; } + else + return s; } static void @@ -452,6 +619,11 @@ bgp_start_neighbor(struct bgp_proto *p) DBG("BGP: Selected link-level address %I\n", p->local_link); } #endif + + int rv = bgp_open(p); + if (rv < 0) + return; + bgp_initiate(p); } @@ -462,16 +634,20 @@ bgp_neigh_notify(neighbor *n) if (n->iface) { - BGP_TRACE(D_EVENTS, "Neighbor found"); - bgp_start_neighbor(p); + if ((p->p.proto_state == PS_START) && (p->start_state == BSS_PREPARE)) + { + BGP_TRACE(D_EVENTS, "Neighbor found"); + bgp_start_neighbor(p); + } } else { - BGP_TRACE(D_EVENTS, "Neighbor lost"); - /* Send cease packets, but don't wait for them to be delivered */ - bgp_graceful_close_conn(&p->outgoing_conn); - bgp_graceful_close_conn(&p->incoming_conn); - proto_notify_state(&p->p, PS_DOWN); + if ((p->p.proto_state == PS_START) || (p->p.proto_state == PS_UP)) + { + BGP_TRACE(D_EVENTS, "Neighbor lost"); + bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST); + bgp_stop(p); + } } } @@ -481,6 +657,12 @@ bgp_start_locked(struct object_lock *lock) struct bgp_proto *p = lock->data; struct bgp_config *cf = p->cf; + if (p->p.proto_state != PS_START) + { + DBG("BGP: Got lock in different state %d\n", p->p.proto_state); + return; + } + DBG("BGP: Got lock\n"); p->local_id = cf->c.global->router_id; p->next_hop = cf->multihop ? cf->multihop_via : cf->remote_ip; @@ -497,10 +679,14 @@ bgp_start_locked(struct object_lock *lock) if (!p->neigh) { log(L_ERR "%s: Invalid next hop %I", p->p.name, p->next_hop); + /* As we do not start yet, we can just disable protocol */ p->p.disabled = 1; + bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP); proto_notify_state(&p->p, PS_DOWN); + return; } - else if (p->neigh->iface) + + if (p->neigh->iface) bgp_start_neighbor(p); else BGP_TRACE(D_EVENTS, "Waiting for %I to become my neighbor", p->next_hop); @@ -513,16 +699,14 @@ bgp_start(struct proto *P) struct object_lock *lock; DBG("BGP: Startup.\n"); + p->start_state = BSS_PREPARE; p->outgoing_conn.state = BS_IDLE; p->incoming_conn.state = BS_IDLE; - p->startup_delay = 0; p->neigh = NULL; - bgp_counter++; - bgp_setup_listen_sk(); - - if (!bgp_linpool) - bgp_linpool = lp_new(&root_pool, 4080); + p->event = ev_new(p->p.pool); + p->event->hook = bgp_decision; + p->event->data = p; /* * Before attempting to create the connection, we need to lock the @@ -539,16 +723,6 @@ bgp_start(struct proto *P) lock->data = p; olock_acquire(lock); - /* We should create security association after we get a lock not to - * break existing connections. - */ - if (p->cf->password) - { - int rv = sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->password); - if (rv < 0) - return PS_STOP; - } - return PS_START; } @@ -558,31 +732,11 @@ bgp_shutdown(struct proto *P) struct bgp_proto *p = (struct bgp_proto *) P; BGP_TRACE(D_EVENTS, "Shutdown requested"); + bgp_store_error(p, NULL, BE_MAN_DOWN, 0); + p->startup_delay = 0; + bgp_stop(p); - /* - * We want to send the Cease notification message to all connections - * we have open, but we don't want to wait for all of them to complete. - * We are willing to handle the primary connection carefully, but for - * the others we just try to send the packet and if there is no buffer - * space free, we'll gracefully finish. - */ - - proto_notify_state(&p->p, PS_STOP); - if (!p->conn) - { - if (p->outgoing_conn.state != BS_IDLE) - p->outgoing_conn.primary = 1; /* Shuts protocol down after connection close */ - else if (p->incoming_conn.state != BS_IDLE) - p->incoming_conn.primary = 1; - } - if (bgp_graceful_close_conn(&p->outgoing_conn) || bgp_graceful_close_conn(&p->incoming_conn)) - return p->p.proto_state; - else - { - /* No connections open, shutdown automatically */ - bgp_close(p); - return PS_DOWN; - } + return p->p.proto_state; } static struct proto * @@ -618,19 +772,48 @@ bgp_init(struct proto_config *C) void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len) { - if (c->error_flag) + if (c->state == BS_CLOSE) return; + bgp_log_error(c->bgp, "Error", code, subcode, data, (len > 0) ? len : -len); - c->error_flag = 1 + (code != 6); + bgp_store_error(c->bgp, c, BE_BGP_TX, (code << 16) | subcode); + bgp_update_startup_delay(c->bgp, c, code, subcode); + bgp_conn_enter_close_state(c); + c->notify_code = code; c->notify_subcode = subcode; c->notify_data = data; c->notify_size = (len > 0) ? len : 0; - if (c->primary) - proto_notify_state(&c->bgp->p, PS_STOP); bgp_schedule_packet(c, PKT_NOTIFICATION); } +/** + * bgp_store_error - store last error for status report + * @p: BGP instance + * @c: connection + * @class: error class (BE_xxx constants) + * @code: error code (class specific) + * + * bgp_store_error() decides whether given error is interesting enough + * and store that error to last_error variables of @p + */ +void +bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code) +{ + /* During PS_UP, we ignore errors on secondary connection */ + if ((p->p.proto_state == PS_UP) && c && (c != p->conn)) + return; + + /* During PS_STOP, we ignore any errors, as we want to report + * the error that caused transition to PS_STOP + */ + if (p->p.proto_state == PS_STOP) + return; + + p->last_error_class = class; + p->last_error_code = code; +} + void bgp_check(struct bgp_config *c) { @@ -639,7 +822,7 @@ bgp_check(struct bgp_config *c) if (!c->remote_as) cf_error("Neighbor must be configured"); if (!bgp_as4_support && c->enable_as4) - cf_error("AS4 support disabled globbaly"); + cf_error("AS4 support disabled globally"); if (!c->enable_as4 && (c->local_as > 0xFFFF)) cf_error("Local AS number out of range"); if (!c->enable_as4 && (c->remote_as > 0xFFFF)) @@ -650,15 +833,40 @@ bgp_check(struct bgp_config *c) cf_error("Only external neighbor can be RS client"); } +static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" }; +static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown", ""}; +static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed" }; + + static void bgp_get_status(struct proto *P, byte *buf) { struct bgp_proto *p = (struct bgp_proto *) P; + const byte *err1 = bgp_err_classes[p->last_error_class]; + const byte *err2 = ""; + byte errbuf[32]; + + switch (p->last_error_class) + { + case BE_MISC: + err2 = bgp_misc_errors[p->last_error_code]; + break; + case BE_SOCKET: + err2 = (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code); + break; + case BE_BGP_RX: + case BE_BGP_TX: + err2 = bgp_error_dsc(errbuf, p->last_error_code >> 16, p->last_error_code & 0xFF); + break; + } + if (P->proto_state == PS_DOWN) - buf[0] = 0; + bsprintf(buf, "%s%s", err1, err2); else - strcpy(buf, bgp_state_names[MAX(p->incoming_conn.state, p->outgoing_conn.state)]); + bsprintf(buf, "%-14s%s%s", + bgp_state_names[MAX(p->incoming_conn.state, p->outgoing_conn.state)], + err1, err2); } static int diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index aaa2c4a..5c180cc 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -47,12 +47,13 @@ struct bgp_conn { struct timer *connect_retry_timer; struct timer *hold_timer; struct timer *keepalive_timer; + struct event *tx_ev; int packets_to_send; /* Bitmap of packet types to be sent */ int notify_code, notify_subcode, notify_size; byte *notify_data; int error_flag; /* Error state, ignore all input */ - int primary; /* This connection is primary */ u32 advertised_as; /* Temporary value for AS number received */ + int as4_support; /* Peer supports 4B AS numbers [RFC4893] */ unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */ }; @@ -60,8 +61,8 @@ struct bgp_proto { struct proto p; struct bgp_config *cf; /* Shortcut to BGP configuration */ u32 local_as, remote_as; + int start_state; /* Substates that partitions BS_START */ int is_internal; /* Internal BGP connection (local_as == remote_as) */ - int as4_support; /* Peer supports 4B AS numbers [RFC4893] */ int as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */ u32 local_id; /* BGP identifier of this router */ u32 remote_id; /* BGP identifier of the neighbor */ @@ -75,13 +76,17 @@ struct bgp_proto { ip_addr next_hop; /* Either the peer or multihop_via */ struct neighbor *neigh; /* Neighbor entry corresponding to next_hop */ ip_addr local_addr; /* Address of the local end of the link to next_hop */ + struct event *event; /* Event for respawning and shutting process */ struct bgp_bucket **bucket_hash; /* Hash table of attribute buckets */ unsigned int hash_size, hash_count, hash_limit; struct fib prefix_fib; /* Prefixes to be sent */ list bucket_queue; /* Queue of buckets to send */ struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */ unsigned startup_delay; /* Time to delay protocol startup by due to errors */ - bird_clock_t last_connect; /* Time of last connect attempt */ + bird_clock_t last_proto_error; /* Time of last error that leads to protocol stop */ + u8 last_error_class; /* Error class of last error */ + u32 last_error_code; /* Error code of last error. BGP protocol errors + are encoded as (bgp_err_code << 16 | bgp_err_subcode) */ #ifdef IPV6 byte *mp_reach_start, *mp_unreach_start; /* Multiprotocol BGP attribute notes */ unsigned mp_reach_len, mp_unreach_len; @@ -118,6 +123,12 @@ void bgp_start_timer(struct timer *t, int value); void bgp_check(struct bgp_config *c); void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len); void bgp_close_conn(struct bgp_conn *c); +void bgp_update_startup_delay(struct bgp_proto *p, struct bgp_conn *conn, unsigned code, unsigned subcode); +void bgp_conn_enter_established_state(struct bgp_conn *conn); +void bgp_conn_enter_close_state(struct bgp_conn *conn); +void bgp_conn_enter_idle_state(struct bgp_conn *conn); +void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code); + #ifdef LOCAL_DEBUG #define BGP_FORCE_DEBUG 1 @@ -147,8 +158,10 @@ inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, /* packets.c */ void bgp_schedule_packet(struct bgp_conn *conn, int type); +void bgp_kick_tx(void *vconn); void bgp_tx(struct birdsock *sk); int bgp_rx(struct birdsock *sk, int size); +const byte * bgp_error_dsc(byte *buff, unsigned code, unsigned subcode); void bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len); /* Packet types */ @@ -186,7 +199,7 @@ void bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subco #define BA_AS4_PATH 0x11 /* [RFC4893] */ #define BA_AS4_AGGREGATOR 0x12 -/* BGP states */ +/* BGP connection states */ #define BS_IDLE 0 #define BS_CONNECT 1 /* Attempting to connect */ @@ -194,6 +207,38 @@ void bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subco #define BS_OPENSENT 3 #define BS_OPENCONFIRM 4 #define BS_ESTABLISHED 5 +#define BS_CLOSE 6 /* Used during transition to BS_IDLE */ + +/* BGP start states + * + * Used in PS_START for fine-grained specification of starting state. + * + * When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP protocol + * done what is neccessary to start itself (like acquiring the lock), it goes to BSS_CONNECT. + * When some connection attempt failed because of option or capability error, it goes to + * BSS_CONNECT_NOCAP. + */ + +#define BSS_PREPARE 0 /* Used before ordinary BGP started, i. e. waiting for lock */ +#define BSS_CONNECT 1 /* Ordinary BGP connecting */ +#define BSS_CONNECT_NOCAP 2 /* Legacy BGP connecting (without capabilities) */ + +/* Error classes */ + +#define BE_NONE 0 +#define BE_MISC 1 /* Miscellaneous error */ +#define BE_SOCKET 2 /* Socket error */ +#define BE_BGP_RX 3 /* BGP protocol error notification received */ +#define BE_BGP_TX 4 /* BGP protocol error notification sent */ +#define BE_AUTO_DOWN 5 /* Automatic shutdown */ +#define BE_MAN_DOWN 6 /* Manual shutdown */ + +/* Misc error codes */ + +#define BEM_NEIGHBOR_LOST 1 +#define BEM_INVALID_NEXT_HOP 2 +#define BEM_INVALID_MD5 3 /* MD5 authentication kernel request failed (possibly not supported */ + /* Well-known communities */ diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 00cdf03..4e42d90 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -17,6 +17,8 @@ #include "lib/unaligned.h" #include "lib/socket.h" +#include "nest/cli.h" + #include "bgp.h" static byte * @@ -318,7 +320,8 @@ bgp_fire_tx(struct bgp_conn *conn) if (s & (1 << PKT_SCHEDULE_CLOSE)) { - bgp_close_conn(conn); + /* We can finally close connection and enter idle state */ + bgp_conn_enter_idle_state(conn); return 0; } if (s & (1 << PKT_NOTIFICATION)) @@ -371,8 +374,17 @@ bgp_schedule_packet(struct bgp_conn *conn, int type) DBG("BGP: Scheduling packet type %d\n", type); conn->packets_to_send |= 1 << type; if (conn->sk && conn->sk->tpos == conn->sk->tbuf) - while (bgp_fire_tx(conn)) - ; + ev_schedule(conn->tx_ev); +} + +void +bgp_kick_tx(void *vconn) +{ + struct bgp_conn *conn = vconn; + + DBG("BGP: kicking TX\n"); + while (bgp_fire_tx(conn)) + ; } void @@ -406,9 +418,9 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len) case 65: if (cl != 4) goto err; - p->as4_support = 1; - p->as4_session = p->cf->enable_as4; - if (p->as4_session) + conn->as4_support = 1; + + if (p->cf->enable_as4) conn->advertised_as = get_u32(opt + 2); break; @@ -477,7 +489,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) /* Check state */ if (conn->state != BS_OPENSENT) - { bgp_error(conn, 5, 0, NULL, 0); } + { bgp_error(conn, 5, 0, NULL, 0); return; } /* Check message contents */ if (len < 29 || len != 29 + pkt[28]) @@ -489,7 +501,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) id = get_u32(pkt+24); BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id); - p->remote_id = id; // ??? + conn->as4_support = 0; // Default value, possibly changed by capability. if (bgp_parse_options(conn, pkt+29, pkt[28])) return; @@ -499,7 +511,6 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) if (!id || id == 0xffffffff || id == p->local_id) { bgp_error(conn, 2, 3, pkt+24, -4); return; } - if (conn->advertised_as != p->remote_as) { bgp_error(conn, 2, 2, (byte *) &(conn->advertised_as), -4); return; @@ -513,6 +524,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) case BS_CONNECT: case BS_ACTIVE: case BS_OPENSENT: + case BS_CLOSE: break; case BS_OPENCONFIRM: if ((p->local_id < id) == (conn == &p->incoming_conn)) @@ -532,19 +544,13 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) bug("bgp_rx_open: Unknown state"); } - /* Make this connection primary */ - conn->primary = 1; - p->conn = conn; - /* Update our local variables */ - if (hold < p->cf->hold_time) - conn->hold_time = hold; - else - conn->hold_time = p->cf->hold_time; + conn->hold_time = MIN(hold, p->cf->hold_time); conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3; - // p->remote_as = conn->advertised_as; p->remote_id = id; - DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id); + p->as4_session = p->cf->enable_as4 && conn->as4_support; + + DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session); bgp_schedule_packet(conn, PKT_KEEPALIVE); bgp_start_timer(conn->hold_timer, conn->hold_time); @@ -817,24 +823,41 @@ static struct { { 6, 0, "Cease" } }; +/** + * bgp_error_dsc - return BGP error description + * @buff: temporary buffer + * @code: BGP error code + * @subcode: BGP error subcode + * + * bgp_error_dsc() returns error description for BGP errors + * which might be static string or given temporary buffer. + */ +const byte * +bgp_error_dsc(byte *buff, unsigned code, unsigned subcode) +{ + unsigned i; + for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++) + if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode) + { + return bgp_msg_table[i].msg; + } + + bsprintf(buff, "Unknown error %d.%d", code, subcode); + return buff; +} + void bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len) { - byte *name, namebuf[16]; + const byte *name; + byte namebuf[32]; byte *t, argbuf[36]; unsigned i; if (code == 6 && !subcode) /* Don't report Cease messages */ return; - bsprintf(namebuf, "%d.%d", code, subcode); - name = namebuf; - for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++) - if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode) - { - name = bgp_msg_table[i].msg; - break; - } + name = bgp_error_dsc(namebuf, code, subcode); t = argbuf; if (len) { @@ -857,10 +880,13 @@ bgp_rx_notification(struct bgp_conn *conn, byte *pkt, int len) bgp_error(conn, 1, 2, pkt+16, 2); return; } - bgp_log_error(conn->bgp, "Received error notification", pkt[19], pkt[20], pkt+21, len-21); - conn->error_flag = 1; - if (conn->primary) - proto_notify_state(&conn->bgp->p, PS_STOP); + + unsigned code = pkt[19]; + unsigned subcode = pkt[20]; + bgp_log_error(conn->bgp, "Received error notification", code, subcode, pkt+21, len-21); + bgp_store_error(conn->bgp, conn, BE_BGP_RX, (code << 16) | subcode); + bgp_update_startup_delay(conn->bgp, conn, code, subcode); + bgp_conn_enter_close_state(conn); bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE); } @@ -874,10 +900,7 @@ bgp_rx_keepalive(struct bgp_conn *conn) switch (conn->state) { case BS_OPENCONFIRM: - DBG("BGP: UP!!!\n"); - conn->state = BS_ESTABLISHED; - bgp_attr_init(conn->bgp); - proto_notify_state(&conn->bgp->p, PS_UP); + bgp_conn_enter_established_state(conn); break; case BS_ESTABLISHED: break; @@ -930,18 +953,8 @@ bgp_rx(sock *sk, int size) DBG("BGP: RX hook: Got %d bytes\n", size); while (end >= pkt_start + BGP_HEADER_LENGTH) { - if (conn->error_flag) - { - /* - * We still need to remember the erroneous packet, so that - * we can generate error notifications properly. To avoid - * subsequent reads rewriting the buffer, we just reset the - * rx_hook. - */ - DBG("BGP: Error, dropping input\n"); - sk->rx_hook = NULL; - return 0; - } + if ((conn->state == BS_CLOSE) || (conn->sk != sk)) + return 0; for(i=0; i<16; i++) if (pkt_start[i] != 0xff) { |