BGP进程分析

查看bgpd进程创建了多少个线程

root@ba95ecc17d59:/# top -Hp `pidof bgpd`                  
    46 frr       20   0  315876  10752   4744 S  0.0  0.1   0:03.43 bgpd                        
    47 frr       20   0  315876  10752   4744 S  0.0  0.1   0:01.41 bgpd_io                     
    48 frr       20   0  315876  10752   4744 S  0.0  0.1   0:00.40 bgpd_ka                     

从上面的输出可以看出,bgpd创建了两个线程,一个是bgpd_io,专门用来处理报文的输入输出的。另外一个是bgpd_ka,该线程处理keepalive事件,定时发送keepalive消息给对等体。主线程负责处理bgp的主要逻辑。

bgpd_io线程

创建

//初始化bgp线程
static void bgp_pthreads_init(void)
{
	assert(!bgp_pth_io);
	assert(!bgp_pth_ka);

	frr_pthread_init();

	struct frr_pthread_attr io = {
		.start = frr_pthread_attr_default.start,
		.stop = frr_pthread_attr_default.stop,
	};

    //keepalive线程
	struct frr_pthread_attr ka = {
		.start = bgp_keepalives_start,
		.stop = bgp_keepalives_stop,
	};
	bgp_pth_io = frr_pthread_new(&io, "BGP I/O thread", "bgpd_io");
	bgp_pth_ka = frr_pthread_new(&ka, "BGP Keepalives thread", "bgpd_ka");
}

bgpd_io线程的处理函数是默认函数:

static void *fpt_run(void *arg)
{
	struct frr_pthread *fpt = arg;
	fpt->master->owner = pthread_self();

	int sleeper[2];
	pipe(sleeper);
	thread_add_read(fpt->master, &fpt_dummy, NULL, sleeper[0], NULL);

	fpt->master->handle_signals = false;

	frr_pthread_set_name(fpt);

	frr_pthread_notify_running(fpt);

	struct thread task;
	while (atomic_load_explicit(&fpt->running, memory_order_relaxed)) {
		pthread_testcancel();
		if (thread_fetch(fpt->master, &task)) {
			thread_call(&task);
		}
	}

	close(sleeper[1]);
	close(sleeper[0]);

	return NULL;
}

其具体执行的事物由添加到fpt->master的zebra线程决定。从名字上可以看出该线程主要是用来收发报文的。

事件添加与删除

//添加一个写zebra线程到bgp_pth_io的master中
void bgp_writes_on(struct peer *peer)
{
	struct frr_pthread *fpt = bgp_pth_io;
	assert(fpt->running);

	assert(peer->status != Deleted);
	assert(peer->obuf);
	assert(peer->ibuf);
	assert(peer->ibuf_work);
	assert(!peer->t_connect_check_r);
	assert(!peer->t_connect_check_w);
	assert(peer->fd);

	thread_add_write(fpt->master, bgp_process_writes, peer, peer->fd,
			 &peer->t_write);
	SET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
}

void bgp_writes_off(struct peer *peer)
{
	struct frr_pthread *fpt = bgp_pth_io;
	assert(fpt->running);

	thread_cancel_async(fpt->master, &peer->t_write, NULL);
	THREAD_OFF(peer->t_generate_updgrp_packets);

	UNSET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
}

void bgp_reads_on(struct peer *peer)
{
	struct frr_pthread *fpt = bgp_pth_io;
	assert(fpt->running);

	assert(peer->status != Deleted);
	assert(peer->ibuf);
	assert(peer->fd);
	assert(peer->ibuf_work);
	assert(peer->obuf);
	assert(!peer->t_connect_check_r);
	assert(!peer->t_connect_check_w);
	assert(peer->fd);

	thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
			&peer->t_read);

	SET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
}

void bgp_reads_off(struct peer *peer)
{
	struct frr_pthread *fpt = bgp_pth_io;
	assert(fpt->running);

	thread_cancel_async(fpt->master, &peer->t_read, NULL);
	THREAD_OFF(peer->t_process_packet);

	UNSET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
}

/* Thread internal functions ----------------------------------------------- */

/*
 * Called from I/O pthread when a file descriptor has become ready for writing.
 */
static int bgp_process_writes(struct thread *thread)
{
	static struct peer *peer;
	peer = THREAD_ARG(thread);
	uint16_t status;
	bool reschedule;
	bool fatal = false;

	if (peer->fd < 0)
		return -1;

	struct frr_pthread *fpt = bgp_pth_io;

	pthread_mutex_lock(&peer->io_mtx);
	{
		status = bgp_write(peer);
		reschedule = (stream_fifo_head(peer->obuf) != NULL);
	}
	pthread_mutex_unlock(&peer->io_mtx);

	/* no problem */
	if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) {
	}

	/* problem */
	if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) {
		reschedule = false;
		fatal = true;
	}

	if (reschedule) {
		thread_add_write(fpt->master, bgp_process_writes, peer,
				 peer->fd, &peer->t_write);
	} else if (!fatal) {
		BGP_TIMER_ON(peer->t_generate_updgrp_packets,
			     bgp_generate_updgrp_packets, 0);
	}

	return 0;
}

/*
 * Called from I/O pthread when a file descriptor has become ready for reading,
 * or has hung up.
 *
 * We read as much data as possible, process as many packets as we can and
 * place them on peer->ibuf for secondary processing by the main thread.
 */
static int bgp_process_reads(struct thread *thread)
{
	/* clang-format off */
	static struct peer *peer;	// peer to read from
	uint16_t status;		// bgp_read status code
	bool more = true;		// whether we got more data
	bool fatal = false;		// whether fatal error occurred
	bool added_pkt = false;		// whether we pushed onto ->ibuf
	/* clang-format on */

	peer = THREAD_ARG(thread);

	if (peer->fd < 0 || bm->terminating)
		return -1;

	struct frr_pthread *fpt = bgp_pth_io;

	pthread_mutex_lock(&peer->io_mtx);
	{
		status = bgp_read(peer);
	}
	pthread_mutex_unlock(&peer->io_mtx);

	/* error checking phase */
	if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) {
		/* no problem; just don't process packets */
		more = false;
	}

	if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) {
		/* problem; tear down session */
		more = false;
		fatal = true;
	}

	while (more) {
		/* static buffer for transferring packets */
		static unsigned char pktbuf[BGP_MAX_PACKET_SIZE];
		/* shorter alias to peer's input buffer */
		struct ringbuf *ibw = peer->ibuf_work;
		/* packet size as given by header */
		uint16_t pktsize = 0;

		/* check that we have enough data for a header */
		if (ringbuf_remain(ibw) < BGP_HEADER_SIZE)
			break;

		/* check that header is valid */
		if (!validate_header(peer)) {
			fatal = true;
			break;
		}

		/* header is valid; retrieve packet size */
		ringbuf_peek(ibw, BGP_MARKER_SIZE, &pktsize, sizeof(pktsize));

		pktsize = ntohs(pktsize);

		/* if this fails we are seriously screwed */
		assert(pktsize <= BGP_MAX_PACKET_SIZE);

		/*
		 * If we have that much data, chuck it into its own
		 * stream and append to input queue for processing.
		 */
		if (ringbuf_remain(ibw) >= pktsize) {
			struct stream *pkt = stream_new(pktsize);
			assert(ringbuf_get(ibw, pktbuf, pktsize) == pktsize);
			stream_put(pkt, pktbuf, pktsize);

			pthread_mutex_lock(&peer->io_mtx);
			{
				stream_fifo_push(peer->ibuf, pkt);
			}
			pthread_mutex_unlock(&peer->io_mtx);

			added_pkt = true;
		} else
			break;
	}

	assert(ringbuf_space(peer->ibuf_work) >= BGP_MAX_PACKET_SIZE);

	/* handle invalid header */
	if (fatal) {
		/* wipe buffer just in case someone screwed up */
		ringbuf_wipe(peer->ibuf_work);
	} else {
		thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
				&peer->t_read);
		if (added_pkt)
            // 将一个任务添加到主线程中
			thread_add_timer_msec(bm->master, bgp_process_packet,
					      peer, 0, &peer->t_process_packet);
	}

	return 0;
}

在邻居与自己完成了tcp连接后,主线程会调用bgp_reads_on函数添加读事件到fpt->master中,io线程将数据读出后,会存储在peer.ibuf中,通过在bm->master中添加报文处理事件触发主线程处理报文。

ka线程和主线程如果有报文需要发送,那么先构建报文,让后将报文写入peer.obuf中,通过函数bgp_writes_on在fpt->master中添加写事件触发io线程发送报文。

主线程

主线程的作用主要是处理从io线程收到的包,命令行,zebra的事件等。在处理报文时,可以以bgp_process函数为分界线划成两部分:前部分是报文的校验,解析,信息的提取,过滤操作等,然后调用bgp_process将相关信息压入一个队列bm->process_main_queue。如下所示:

void bgp_process(struct bgp *bgp, struct bgp_node *rn, afi_t afi, safi_t safi)
{
#define ARBITRARY_PROCESS_QLEN		10000
	struct work_queue *wq = bm->process_main_queue;
	struct bgp_process_queue *pqnode;
	int pqnode_reuse = 0;

	/* already scheduled for processing? */
	if (CHECK_FLAG(rn->flags, BGP_NODE_PROCESS_SCHEDULED))
		return;

	if (wq == NULL)
		return;

	/* Add route nodes to an existing work queue item until reaching the
	   limit only if is from the same BGP view and it's not an EOIU marker
	 */
	if (work_queue_item_count(wq)) {
		struct work_queue_item *item = work_queue_last_item(wq);
		pqnode = item->data;

		if (CHECK_FLAG(pqnode->flags, BGP_PROCESS_QUEUE_EOIU_MARKER)
		    || pqnode->bgp != bgp
		    || pqnode->queued >= ARBITRARY_PROCESS_QLEN)
			pqnode = bgp_processq_alloc(bgp);
		else
			pqnode_reuse = 1;
	} else
		pqnode = bgp_processq_alloc(bgp);
	/* all unlocked in bgp_process_wq */
	bgp_table_lock(bgp_node_table(rn));

	SET_FLAG(rn->flags, BGP_NODE_PROCESS_SCHEDULED);
	bgp_lock_node(rn);

	/* can't be enqueued twice */
	assert(STAILQ_NEXT(rn, pq) == NULL);
	STAILQ_INSERT_TAIL(&pqnode->pqueue, rn, pq);
	pqnode->queued++;

	if (!pqnode_reuse)
		work_queue_add(wq, pqnode);

	return;
}

后半部分主要是从队列中提取事件进行处理,包括路由选择,撤销,分发等。我们看一下队列的初始化过程:

//工作队列初始化
void bgp_process_queue_init(void)
{
	if (!bm->process_main_queue)
		bm->process_main_queue =
			work_queue_new(bm->master, "process_main_queue");//从这里可以看出队列是由主线程master执行
    //主队列的工作函数
	bm->process_main_queue->spec.workfunc = &bgp_process_wq;
	bm->process_main_queue->spec.del_item_data = &bgp_processq_del;
	bm->process_main_queue->spec.max_retries = 0;
	bm->process_main_queue->spec.hold = 50;
	/* Use a higher yield value of 50ms for main queue processing */
	bm->process_main_queue->spec.yield = 50 * 1000L;
}

从上面可以看出队列执行体为主线程。队列工作函数为bgp_process_wq。

keepalive线程

keepalive线程主要是用于定期向邻居发送keepalive报文,进行保活,它只负责构建报文,然后交给io线程进行发送。
keepalive报文的处理由主线程进行。

posted @ 2020-01-08 18:17  ouyangxibao  阅读(633)  评论(0)    收藏  举报