BGP进程分析
查看bgpd进程创建了多少个线程
root@ba95ecc17d59:/# top -Hp `pidof bgpd`
46 frr 20 0 315876 10752 4744 S 0.0 0.1 0:03.43 bgpd
47 frr 20 0 315876 10752 4744 S 0.0 0.1 0:01.41 bgpd_io
48 frr 20 0 315876 10752 4744 S 0.0 0.1 0:00.40 bgpd_ka
从上面的输出可以看出,bgpd创建了两个线程,一个是bgpd_io,专门用来处理报文的输入输出的。另外一个是bgpd_ka,该线程处理keepalive事件,定时发送keepalive消息给对等体。主线程负责处理bgp的主要逻辑。
bgpd_io线程
创建
//初始化bgp线程
static void bgp_pthreads_init(void)
{
assert(!bgp_pth_io);
assert(!bgp_pth_ka);
frr_pthread_init();
struct frr_pthread_attr io = {
.start = frr_pthread_attr_default.start,
.stop = frr_pthread_attr_default.stop,
};
//keepalive线程
struct frr_pthread_attr ka = {
.start = bgp_keepalives_start,
.stop = bgp_keepalives_stop,
};
bgp_pth_io = frr_pthread_new(&io, "BGP I/O thread", "bgpd_io");
bgp_pth_ka = frr_pthread_new(&ka, "BGP Keepalives thread", "bgpd_ka");
}
bgpd_io线程的处理函数是默认函数:
static void *fpt_run(void *arg)
{
struct frr_pthread *fpt = arg;
fpt->master->owner = pthread_self();
int sleeper[2];
pipe(sleeper);
thread_add_read(fpt->master, &fpt_dummy, NULL, sleeper[0], NULL);
fpt->master->handle_signals = false;
frr_pthread_set_name(fpt);
frr_pthread_notify_running(fpt);
struct thread task;
while (atomic_load_explicit(&fpt->running, memory_order_relaxed)) {
pthread_testcancel();
if (thread_fetch(fpt->master, &task)) {
thread_call(&task);
}
}
close(sleeper[1]);
close(sleeper[0]);
return NULL;
}
其具体执行的事物由添加到fpt->master的zebra线程决定。从名字上可以看出该线程主要是用来收发报文的。
事件添加与删除
//添加一个写zebra线程到bgp_pth_io的master中
void bgp_writes_on(struct peer *peer)
{
struct frr_pthread *fpt = bgp_pth_io;
assert(fpt->running);
assert(peer->status != Deleted);
assert(peer->obuf);
assert(peer->ibuf);
assert(peer->ibuf_work);
assert(!peer->t_connect_check_r);
assert(!peer->t_connect_check_w);
assert(peer->fd);
thread_add_write(fpt->master, bgp_process_writes, peer, peer->fd,
&peer->t_write);
SET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
}
void bgp_writes_off(struct peer *peer)
{
struct frr_pthread *fpt = bgp_pth_io;
assert(fpt->running);
thread_cancel_async(fpt->master, &peer->t_write, NULL);
THREAD_OFF(peer->t_generate_updgrp_packets);
UNSET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
}
void bgp_reads_on(struct peer *peer)
{
struct frr_pthread *fpt = bgp_pth_io;
assert(fpt->running);
assert(peer->status != Deleted);
assert(peer->ibuf);
assert(peer->fd);
assert(peer->ibuf_work);
assert(peer->obuf);
assert(!peer->t_connect_check_r);
assert(!peer->t_connect_check_w);
assert(peer->fd);
thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
&peer->t_read);
SET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
}
void bgp_reads_off(struct peer *peer)
{
struct frr_pthread *fpt = bgp_pth_io;
assert(fpt->running);
thread_cancel_async(fpt->master, &peer->t_read, NULL);
THREAD_OFF(peer->t_process_packet);
UNSET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
}
/* Thread internal functions ----------------------------------------------- */
/*
* Called from I/O pthread when a file descriptor has become ready for writing.
*/
static int bgp_process_writes(struct thread *thread)
{
static struct peer *peer;
peer = THREAD_ARG(thread);
uint16_t status;
bool reschedule;
bool fatal = false;
if (peer->fd < 0)
return -1;
struct frr_pthread *fpt = bgp_pth_io;
pthread_mutex_lock(&peer->io_mtx);
{
status = bgp_write(peer);
reschedule = (stream_fifo_head(peer->obuf) != NULL);
}
pthread_mutex_unlock(&peer->io_mtx);
/* no problem */
if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) {
}
/* problem */
if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) {
reschedule = false;
fatal = true;
}
if (reschedule) {
thread_add_write(fpt->master, bgp_process_writes, peer,
peer->fd, &peer->t_write);
} else if (!fatal) {
BGP_TIMER_ON(peer->t_generate_updgrp_packets,
bgp_generate_updgrp_packets, 0);
}
return 0;
}
/*
* Called from I/O pthread when a file descriptor has become ready for reading,
* or has hung up.
*
* We read as much data as possible, process as many packets as we can and
* place them on peer->ibuf for secondary processing by the main thread.
*/
static int bgp_process_reads(struct thread *thread)
{
/* clang-format off */
static struct peer *peer; // peer to read from
uint16_t status; // bgp_read status code
bool more = true; // whether we got more data
bool fatal = false; // whether fatal error occurred
bool added_pkt = false; // whether we pushed onto ->ibuf
/* clang-format on */
peer = THREAD_ARG(thread);
if (peer->fd < 0 || bm->terminating)
return -1;
struct frr_pthread *fpt = bgp_pth_io;
pthread_mutex_lock(&peer->io_mtx);
{
status = bgp_read(peer);
}
pthread_mutex_unlock(&peer->io_mtx);
/* error checking phase */
if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) {
/* no problem; just don't process packets */
more = false;
}
if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) {
/* problem; tear down session */
more = false;
fatal = true;
}
while (more) {
/* static buffer for transferring packets */
static unsigned char pktbuf[BGP_MAX_PACKET_SIZE];
/* shorter alias to peer's input buffer */
struct ringbuf *ibw = peer->ibuf_work;
/* packet size as given by header */
uint16_t pktsize = 0;
/* check that we have enough data for a header */
if (ringbuf_remain(ibw) < BGP_HEADER_SIZE)
break;
/* check that header is valid */
if (!validate_header(peer)) {
fatal = true;
break;
}
/* header is valid; retrieve packet size */
ringbuf_peek(ibw, BGP_MARKER_SIZE, &pktsize, sizeof(pktsize));
pktsize = ntohs(pktsize);
/* if this fails we are seriously screwed */
assert(pktsize <= BGP_MAX_PACKET_SIZE);
/*
* If we have that much data, chuck it into its own
* stream and append to input queue for processing.
*/
if (ringbuf_remain(ibw) >= pktsize) {
struct stream *pkt = stream_new(pktsize);
assert(ringbuf_get(ibw, pktbuf, pktsize) == pktsize);
stream_put(pkt, pktbuf, pktsize);
pthread_mutex_lock(&peer->io_mtx);
{
stream_fifo_push(peer->ibuf, pkt);
}
pthread_mutex_unlock(&peer->io_mtx);
added_pkt = true;
} else
break;
}
assert(ringbuf_space(peer->ibuf_work) >= BGP_MAX_PACKET_SIZE);
/* handle invalid header */
if (fatal) {
/* wipe buffer just in case someone screwed up */
ringbuf_wipe(peer->ibuf_work);
} else {
thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
&peer->t_read);
if (added_pkt)
// 将一个任务添加到主线程中
thread_add_timer_msec(bm->master, bgp_process_packet,
peer, 0, &peer->t_process_packet);
}
return 0;
}
在邻居与自己完成了tcp连接后,主线程会调用bgp_reads_on函数添加读事件到fpt->master中,io线程将数据读出后,会存储在peer.ibuf中,通过在bm->master中添加报文处理事件触发主线程处理报文。
ka线程和主线程如果有报文需要发送,那么先构建报文,让后将报文写入peer.obuf中,通过函数bgp_writes_on在fpt->master中添加写事件触发io线程发送报文。
主线程
主线程的作用主要是处理从io线程收到的包,命令行,zebra的事件等。在处理报文时,可以以bgp_process函数为分界线划成两部分:前部分是报文的校验,解析,信息的提取,过滤操作等,然后调用bgp_process将相关信息压入一个队列bm->process_main_queue。如下所示:
void bgp_process(struct bgp *bgp, struct bgp_node *rn, afi_t afi, safi_t safi)
{
#define ARBITRARY_PROCESS_QLEN 10000
struct work_queue *wq = bm->process_main_queue;
struct bgp_process_queue *pqnode;
int pqnode_reuse = 0;
/* already scheduled for processing? */
if (CHECK_FLAG(rn->flags, BGP_NODE_PROCESS_SCHEDULED))
return;
if (wq == NULL)
return;
/* Add route nodes to an existing work queue item until reaching the
limit only if is from the same BGP view and it's not an EOIU marker
*/
if (work_queue_item_count(wq)) {
struct work_queue_item *item = work_queue_last_item(wq);
pqnode = item->data;
if (CHECK_FLAG(pqnode->flags, BGP_PROCESS_QUEUE_EOIU_MARKER)
|| pqnode->bgp != bgp
|| pqnode->queued >= ARBITRARY_PROCESS_QLEN)
pqnode = bgp_processq_alloc(bgp);
else
pqnode_reuse = 1;
} else
pqnode = bgp_processq_alloc(bgp);
/* all unlocked in bgp_process_wq */
bgp_table_lock(bgp_node_table(rn));
SET_FLAG(rn->flags, BGP_NODE_PROCESS_SCHEDULED);
bgp_lock_node(rn);
/* can't be enqueued twice */
assert(STAILQ_NEXT(rn, pq) == NULL);
STAILQ_INSERT_TAIL(&pqnode->pqueue, rn, pq);
pqnode->queued++;
if (!pqnode_reuse)
work_queue_add(wq, pqnode);
return;
}
后半部分主要是从队列中提取事件进行处理,包括路由选择,撤销,分发等。我们看一下队列的初始化过程:
//工作队列初始化
void bgp_process_queue_init(void)
{
if (!bm->process_main_queue)
bm->process_main_queue =
work_queue_new(bm->master, "process_main_queue");//从这里可以看出队列是由主线程master执行
//主队列的工作函数
bm->process_main_queue->spec.workfunc = &bgp_process_wq;
bm->process_main_queue->spec.del_item_data = &bgp_processq_del;
bm->process_main_queue->spec.max_retries = 0;
bm->process_main_queue->spec.hold = 50;
/* Use a higher yield value of 50ms for main queue processing */
bm->process_main_queue->spec.yield = 50 * 1000L;
}
从上面可以看出队列执行体为主线程。队列工作函数为bgp_process_wq。
keepalive线程
keepalive线程主要是用于定期向邻居发送keepalive报文,进行保活,它只负责构建报文,然后交给io线程进行发送。
keepalive报文的处理由主线程进行。

浙公网安备 33010602011771号