HTB算法分析

入队

htb_enqueue

static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
		       struct sk_buff **to_free)
{
	int uninitialized_var(ret);
	struct htb_sched *q = qdisc_priv(sch);//获取排队规程私有数据
	struct htb_class *cl = htb_classify(skb, sch, &ret);

	if (cl == HTB_DIRECT) {//类是直接发送类
		/* enqueue to helper queue */
		if (q->direct_queue.qlen < q->direct_qlen) {//直接发送队列是否已经满了,没有满则加入到尾部
			htb_enqueue_tail(skb, sch, &q->direct_queue);
			q->direct_pkts++;
		} else {//否则丢弃
			return qdisc_drop(skb, sch, to_free);
		}
#ifdef CONFIG_NET_CLS_ACT
	} else if (!cl) {//类为空直接丢弃
		if (ret & __NET_XMIT_BYPASS)
			qdisc_qstats_drop(sch);
		__qdisc_drop(skb, to_free);
		return ret;
#endif
	} else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q,
					to_free)) != NET_XMIT_SUCCESS) {
		if (net_xmit_drop_count(ret)) {//入队失败,统计失败报文数
			qdisc_qstats_drop(sch);
			cl->drops++;
		}
		return ret;
	} else {//报文进入叶子类的报文队列中,需要激活该类,因为该类有报文了
	        //入队成功,激活该类
		htb_activate(q, cl);
	}
    //字节数统计
	qdisc_qstats_backlog_inc(sch, skb);
	sch->q.qlen++;//报文个数统计
	return NET_XMIT_SUCCESS;
}

htb_activate

入队成功的话,说明有报文需要发送,激活调度。根据类的令牌情况,将叶子类,以及其父类相关令牌情况,如果处于BORROW状态的话加入到父类的供给红黑树中,否则加入到排队规程的可用红黑树树中。

/**
 * htb_activate - inserts leaf cl into appropriate active feeds
 *
 * Routine learns (new) priority of leaf and activates feed chain
 * for the prio. It can be called on already active leaf safely.
 * It also adds leaf into droplist.
 */
static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
{
	//类必须为叶子类,并且该类的队列不为空,并且队列中有报文
	WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
    
	if (!cl->prio_activity) {//如果类还没有活跃的优先级
		cl->prio_activity = 1 << cl->prio;//设置活跃优先级
		htb_activate_prios(q, cl);//激活,因为该类为叶子类,需要向上激活其父类
		list_add_tail(&cl->un.leaf.drop_list,
			      q->drops + cl->prio);
	}
}

/**
 * htb_activate_prios - creates active classe's feed chain
 *
 * The class is connected to ancestors and/or appropriate rows
 * for priorities it is participating on. cl->cmode must be new
 * (activated) mode. It does nothing if cl->prio_activity == 0.
 */
static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
{
	struct htb_class *p = cl->parent;//获取父类
	long m, mask = cl->prio_activity;

	while (cl->cmode == HTB_MAY_BORROW && p && mask) {//当前类为borrow状态的话,需要挂入父类供给树中
		m = mask;//如果该类是中间类,那么有多个优先级
		while (m) {
			int prio = ffz(~m);//m的1的个数会每一次循环会被m &= ~(1 << prio)语句减小一个bit
			m &= ~(1 << prio);//清除该优先级,准备下一个优先级

			if (p->un.inner.clprio[prio].feed.rb_node)//父供给树在该优先级中已经有类激活了
				/* parent already has its feed in use so that
				 * reset bit in mask as parent is already ok
				 * mask在该优先级上设置一个0,因为父类p在该优先级已经存在了p的父类的供给树中了,不需要再处理
				 */
				mask &= ~(1 << prio);
            //将类挂入父类该优先级的供给树中
			htb_add_to_id_tree(&p->un.inner.clprio[prio].feed, cl, prio);
		}
		//新增加的该优先级类活跃掩码
		p->prio_activity |= mask;
		//回溯父类
		cl = p;
		p = cl->parent;

	}
	if (cl->cmode == HTB_CAN_SEND && mask)//可发送队列
	    //插入htb排队规程的
		htb_add_class_to_row(q, cl, mask);
}
/**
 * htb_add_to_id_tree - adds class to the round robin list
 *
 * Routine adds class to the list (actually tree) sorted by classid.
 * Make sure that class is not already on such list for given prio.
 * 将该类添加到红黑树root中,使用类的node[prio]链入红黑树,因为对于中间类来说由多个活跃优先级
 */
static void htb_add_to_id_tree(struct rb_root *root,
			       struct htb_class *cl, int prio)
{
	//树根
	struct rb_node **p = &root->rb_node, *parent = NULL;

	while (*p) {//遍历红黑树,找到插入的位置,插入的位置一定是一个空节点
		struct htb_class *c;
		parent = *p;
		c = rb_entry(parent, struct htb_class, node[prio]);

		if (cl->common.classid > c->common.classid)
			p = &parent->rb_right;
		else
			p = &parent->rb_left;
	}
	//找到了插入位置,将节点cl->node[prio],插入到p的位置
	rb_link_node(&cl->node[prio], parent, p);
	//进行红黑树调整,使其满足红黑树的性质
	rb_insert_color(&cl->node[prio], root);
}

出队

htb_dequeue

static struct sk_buff *htb_dequeue(struct Qdisc *sch)
{
	struct sk_buff *skb;
	struct htb_sched *q = qdisc_priv(sch);
	int level;
	s64 next_event;
	unsigned long start_at;

	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
	skb = __qdisc_dequeue_head(&q->direct_queue);
	if (skb != NULL) {
ok:
		qdisc_bstats_update(sch, skb);
		qdisc_qstats_backlog_dec(sch, skb);
		sch->q.qlen--;
		return skb;
	}

	if (!sch->q.qlen)
		goto fin;
	q->now = ktime_get_ns();//更新本次调度的起始时间
	start_at = jiffies;//起始调度时间,每一次调度不能超过2个jiffies

	next_event = q->now + 5LLU * NSEC_PER_SEC;
    //从叶子节点开始调度,即从level 0开始,遍历每一个可以发送的红黑树。
    //level 0中的数的每一个节点都是SEND的,其它level最后遍历到叶子节点
    //时是borrow的,除了level对应的红黑树节点是send的,其它子树,子树的子树
    //节点都是borrow的。
	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
		/* common case optimization - skip event handler quickly */
		int m;
		s64 event = q->near_ev_cache[level];
        //处理等待队列中的类,随着时间推移,供给的令牌增多后,需要将等待队列中的类移出来
		if (q->now >= event) {
			event = htb_do_events(q, level, start_at);
			if (!event)//等待队列中没有类
				event = q->now + NSEC_PER_SEC;
			q->near_ev_cache[level] = event;
		}

		if (next_event > event)//记录最小的需要等待的时间
			next_event = event;

		m = ~q->row_mask[level];//获取当前level的活跃队列红黑树掩码取反
		while (m != (int)(-1)) {//根据优先级遍历每一个活跃的红黑树,这里完全按照优先级遍历,只要更高优先级有报文发送,那么就不会发送次优先级
			int prio = ffz(m);//0的个数

			m |= 1 << prio;//去掉一个0,以便下一次遍历下一个优先级
			//查看该level的该优先级中是否有报文能够发送
			skb = htb_dequeue_tree(q, prio, level);
			if (likely(skb != NULL))
				goto ok;//发送报文
		}
	}
	//没有报文能够发送,添加统计
	qdisc_qstats_overlimit(sch);
	if (likely(next_event > q->now)))//下一个等待事件发生还有一段时间,启动看门狗事件
		qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
	else
		schedule_work(&q->work);//还有要调度的类,但是本次处理时间过长,先结束本次调度,让work调度时重启本排队规程调度
fin:
	return skb;
}

htb_dequeue_tree

/* dequeues packet at given priority and level; call only if
 * you are sure that there is active class at prio/level
 * 假设level为1。prio为3
 */
static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
					const int level)
{
	struct sk_buff *skb = NULL;
	struct htb_class *cl, *start;
	struct htb_level *hlevel = &q->hlevel[level];
	struct htb_prio *hprio = &hlevel->hprio[prio];

	/* look initial class up in the row 查找到第一个可以发送的叶子类 */
	start = cl = htb_lookup_leaf(hprio, prio);

	do {
next:
		if (unlikely(!cl))
			return NULL;

		/* class can be empty - it is unlikely but can be true if leaf
		 * qdisc drops packets in enqueue routine or if someone used
		 * graft operation on the leaf since last dequeue;
		 * simply deactivate and skip such class
		 */
		if (unlikely(cl->un.leaf.q->q.qlen == 0)) {//队列为空
			struct htb_class *next;
			htb_deactivate(q, cl);//将该类去激活

			/* row/level might become empty 该level已经没有处于SEND状态的类了,直接返回空 */
			if ((q->row_mask[level] & (1 << prio)) == 0)
				return NULL;
            //开始查找下一个可发送的叶子类  
			next = htb_lookup_leaf(hprio, prio);

			if (cl == start)	/* fix start if we just deleted it */
				start = next;
			cl = next;
			goto next;
		}
        //叶子类出报文
		skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
		if (likely(skb != NULL))//成功,退出,下一次会继续从该类中出报文。
			break;

		qdisc_warn_nonwc("htb", cl->un.leaf.q);
		//该叶子节点获取报文失败,更新树的遍历迭代上下文,即父类的该优先级红黑树。
        //如果当前叶子节点cl为8,因为level为1,所以需要调整tree2的遍历上下文。
        //该上下文保存在节点6对应的类结构中cl->parent->un.inner.clprio[prio].ptr
        //切换到节点9
		htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr:
					 &q->hlevel[0].hprio[prio].ptr);
	    //切换到下一个节点之后,继续查找。
		cl = htb_lookup_leaf(hprio, prio);

	} while (cl != start);//回到起始类也退出。

	if (likely(skb != NULL)) {//报文出队列成功
		bstats_update(&cl->bstats, skb);//更新字节统计
        //记性额度处理
		cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
		if (cl->un.leaf.deficit[level] < 0) {//额度用完,开启下一次循环调度的额度
			cl->un.leaf.deficit[level] += cl->quantum;
            //额度用完,强制调度下一个节点,切换到节点9
			htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr :
						 &q->hlevel[0].hprio[prio].ptr);
		}
		/* this used to be after charge_class but this constelation
		 * gives us slightly better performance
		 * 如果该报文是队列的最后一个报文。那么该队列不再活跃,去活跃处理。
		 */
		if (!cl->un.leaf.q->q.qlen)
			htb_deactivate(q, cl);
        //对出报文skb进行令牌计算。
		htb_charge_class(q, cl, level, skb);
	}
	return skb;
}

htb_lookup_leaf

/**
 * htb_lookup_leaf - returns next leaf class in DRR order
 * Find leaf where current feed pointers points to.
 */
//假设输入的参数hprio->row.rb_node为上图中tree1。注意调用该函数一定是从hlevel->hprio[prio]
//中传入的,其中的类都是出于SEND模式。prio为3
static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
{
	int i;
	struct {
		struct rb_node *root;
		struct rb_node **pptr;
		u32 *pid;
	} stk[TC_HTB_MAXDEPTH], *sp = stk;

	BUG_ON(!hprio->row.rb_node);
	sp->root = hprio->row.rb_node;//获取树根,我们假设为上图中的4节点,类id为4。
	sp->pptr = &hprio->ptr;//从上次访问的类继续开始,迭代遍历
	sp->pid = &hprio->last_ptr_id;

	for (i = 0; i < 65535; i++) {//最多循环65536次,找到符合期望的叶子节点
		if (!*sp->pptr && *sp->pid) {//指针没有指向上次具体操作的类,但是上次操作的类id存在
			/* ptr was invalidated but id is valid - try to recover
			 * the original or next ptr 根据类id获取类地址。
			 * 根据类id获取类地址,*sp->pptr可能为pid对应的节点,也有可能是下一个节点。
			 */
			*sp->pptr = htb_id_find_next_upper(prio, sp->root, *sp->pid);
		}
		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
				 		 * can become out of date quickly
				 	     */
		if (!*sp->pptr) {/* we are at right end; rewind & go up  
		                  * 没有指定的类啦,当前树已经遍历完毕,这里为该树下一轮遍历做准备。
		                  * 假设当前遍历的是tree2。
		                  */
			*sp->pptr = sp->root;//查找到最小节点
			while ((*sp->pptr)->rb_left)//找到树最左边的节点,即节点8
				*sp->pptr = (*sp->pptr)->rb_left;
			//本棵树已经遍历完了,跳到上一棵树tree1,那么节点为6,*sp->pptr == 6节点指针
			if (sp > stk) {//没有到入口level,当sp==stk时,表示回到了hprio
				sp--;//跳到遍历节点6所在树的栈
				if (!*sp->pptr) {//不可能为空,因为进入tree2是从节点6进去的。
					WARN_ON(1);
					return NULL;
				}
				//获取节点6的下一个类节点,即节点7
				htb_next_rb_node(sp->pptr);
			}
		} else {//上次操作的类还在
			struct htb_class *cl;
			struct htb_prio *clp;
			//根据地址找到类描述控制块
			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
			if (!cl->level)//是叶子类,直接返回。比如上面的节点8。这里没有将sp->pptr和sp->pid
                           //变到下一个节点9,是因为需要从叶子节点8的队列中提取一个报文进行发送。
                           //由调用该函数的函数负责提取,当提取报文失败的时候,说明该节点遍历完毕
                           //由调用函数htb_dequeue_tree负责切换到写一个节点。
				return cl;
			//如果cl为tree1的6节点,那么clp将会是11节点
			clp = &cl->un.inner.clprio[prio];
            //查找栈增加一格,用来维护树tree2查找上下文。
			(++sp)->root = clp->feed.rb_node;
			sp->pptr = &clp->ptr;//获取迭代器
			sp->pid = &clp->last_ptr_id;
		}
	}
	WARN_ON(1);//永远不可能为NULL,因为入口点时从可发送队列开始的。
	return NULL;
}

htb_deactivate

/**
 * htb_deactivate - remove leaf cl from active feeds
 *
 * Make sure that leaf is active. In the other words it can't be called
 * with non-active leaf. It also removes class from the drop list.
 * 将一个叶子类从调度矩阵递归树中移除,调整矩阵递归树。
 */
static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
{
	WARN_ON(!cl->prio_activity);//原来必须是活跃的

	htb_deactivate_prios(q, cl);
	cl->prio_activity = 0;//取消标志
	list_del_init(&cl->un.leaf.drop_list);//从丢包链表中摘除
}

/**
 * htb_deactivate_prios - remove class from feed chain
 *
 * cl->cmode must represent old mode (before deactivation). It does
 * nothing if cl->prio_activity == 0. Class is removed from all feed
 * chains and rows.
 * 将一个类从供给链中移除,需要递归到顶层。
 */
static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
{
	struct htb_class *p = cl->parent;//父类
	long m, mask = cl->prio_activity;//该类活跃的优先级,意味着需要从父类的多个供给树中移除。
    //mask表示本类需要去掉的活跃优先级,如果本类是父类的最后一个活跃的类,那么需要将父类该优先级
    //也去掉,所以需要递归直到mask为0
	while (cl->cmode == HTB_MAY_BORROW && p && mask) {//当前类必须是处于borrow状态,只有处于borrow状态才会在供给树中。否则在row树中
		m = mask;
		mask = 0;//父类需要去掉的活跃优先级
		while (m) {//遍历每一个活跃优先级
			int prio = ffz(~m);
			m &= ~(1 << prio);
            //该类是父供给树迭代调度的当前节点
			if (p->un.inner.clprio[prio].ptr == cl->node + prio) {
				/* we are removing child which is pointed to from
				 * parent feed - forget the pointer but remember
				 * classid
				 * 因为要从树中摘除,所以不再记录指针,记录指针页找不到下一个节点了,因为关系变了
				 * 只需要记录类id。下一次调度的时候自动获取类id更大的节点。以p->un.inner.clprio[prio].ptr == null为标志
				 * 详细可以看函数htb_lookup_leaf
				 */
				p->un.inner.clprio[prio].last_ptr_id = cl->common.classid;
				p->un.inner.clprio[prio].ptr = NULL;
			}
            //将该节点从红黑树中移除
			htb_safe_rb_erase(cl->node + prio,
					  &p->un.inner.clprio[prio].feed);
            //如果父类的该优先级供给树为空了,那么该父类该优先级标志需要去掉。 
			if (!p->un.inner.clprio[prio].feed.rb_node)
				//父类该优先级不再活跃
				mask |= 1 << prio;
		}
        //去掉父类不再活跃的优先级,然后递归到更高一级
		p->prio_activity &= ~mask;
		cl = p;
		p = cl->parent;

	}
	if (cl->cmode == HTB_CAN_SEND && mask)//在row中,mask表示cl需要去掉的活跃优先级
		htb_remove_class_from_row(q, cl, mask);
}
/**
 * htb_remove_class_from_row - removes class from its row
 *
 * The class is removed from row at priorities marked in mask.
 * It does nothing if mask == 0.
 */
static inline void htb_remove_class_from_row(struct htb_sched *q,
						 struct htb_class *cl, int mask)
{
	int m = 0;
	struct htb_level *hlevel = &q->hlevel[cl->level];

	while (mask) {//遍历每一个优先级
		int prio = ffz(~mask);
		struct htb_prio *hprio = &hlevel->hprio[prio];

		mask &= ~(1 << prio);
		if (hprio->ptr == cl->node + prio)//如果要移除的节点是调度上下文中的节点,则更新红黑树的遍历上下文到下一个节点
			htb_next_rb_node(&hprio->ptr);
        //将节点从红黑树中移除
		htb_safe_rb_erase(cl->node + prio, &hprio->row);
		if (!hprio->row.rb_node)//该类是该优先级中最后一个活跃的节点,则说明该level需要去掉该红黑树
			m |= 1 << prio;
	}
	//移除本level需要去掉的活跃优先级
	q->row_mask[cl->level] &= ~m;
}

令牌计算

在没开启迟滞功能前。

htb_charge_class

/**
 * htb_charge_class - charges amount "bytes" to leaf and ancestors
 *
 * Routine assumes that packet "bytes" long was dequeued from leaf cl
 * borrowing from "level". It accounts bytes to ceil leaky bucket for
 * leaf and all ancestors and to rate bucket for ancestors at levels
 * "level" and higher. It also handles possible change of mode resulting
 * from the update. Note that mode can also increase here (MAY_BORROW to
 * CAN_SEND) because we can use more precise clock that event queue here.
 * In such case we remove class from event queue first.
 * 为了发送该报文,计算所需的令牌,调整结算后的令牌情况
 */
static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,//叶子类
			     int level,//本次调度的根树所在level,其对应的类一定是send模式的。 
			     struct sk_buff *skb)
{
	int bytes = qdisc_pkt_len(skb);//报文长度
	enum htb_cmode old_mode;//原来的模式
	s64 diff;
    //htb支持借用模式,当本类不够用时,需要去向父类借用,直到可以发送或者不能借到。
    //所以这里需要回溯父类。
	while (cl) {
		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);//计算本段时间需要补充的令牌数(令牌数转换成了时间)
		if (cl->level >= level) {//根类即以上,一定是SEND模式,C桶中还有令牌
			if (cl->level == level)//属于借出方
				cl->xstats.lends++;
			htb_accnt_tokens(cl, bytes, diff);//进行c桶令牌计算
		} else {//已经是处于借用模式,不再计算本报文的c桶令牌。
		        //为什么不计算呢?tbf则计算了,这有什么不同。
		        //这里不处理已经处于借用模式下的类的c桶,正是体现了借用的含义
		        //因为发送这个报文是用的别人的令牌。
			cl->xstats.borrows++;
			cl->tokens += diff;	/* we moved t_c; update tokens */
		}
		
		//计算租借的令牌数,都要计算
		htb_accnt_ctokens(cl, bytes, diff);
		cl->t_c = q->now;//更新上一次检查点

		old_mode = cl->cmode;
		diff = 0;
		//修改类的模式,diff用来记录不足的令牌数,用于等待队列
		htb_change_class_mode(q, cl, &diff);
		if (old_mode != cl->cmode) {
			if (old_mode != HTB_CAN_SEND)//可以发送了,从等待队列中移除
				htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
			if (cl->cmode != HTB_CAN_SEND)//不能发送了,加入到等待队列
				htb_add_to_wait_tree(q, cl, diff);
		}

		/* update basic stats except for leaves which are already updated */
		if (cl->level)
			bstats_update(&cl->bstats, skb);

		cl = cl->parent;//回溯到其父类
	}
}

1.模式不是HTB_CAN_SEND状态,说明其一定在wait_pq队列中;反之模式为HTB_CAN_SEND状态,则一定不在wait_pq队列中。

htb_change_class_mode

/**
 * htb_change_class_mode - changes classe's mode
 *
 * This should be the only way how to change classe's mode under normal
 * cirsumstances. Routine will update feed lists linkage, change mode
 * and add class to the wait event queue if appropriate. New mode should
 * be different from old one and cl->pq_key has to be valid if changing
 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
 */
static void
htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
{
	enum htb_cmode new_mode = htb_class_mode(cl, diff);

	if (new_mode == cl->cmode)//模式相同,不处理
		return;

	if (new_mode == HTB_CANT_SEND)//统计不能发送次数
		cl->overlimits++;

	if (cl->prio_activity) {	/* not necessary: speed optimization 如果该类存在活跃的优先级,即有报文等待发送 */
		if (cl->cmode != HTB_CANT_SEND)//原来模式不是不可以发送,那么现在是不可以发送。
		                               //这个解释比较拗口,其等价意思是,原来可以发送,那么现在不能发送了,将该类去激活
			htb_deactivate_prios(q, cl);
		cl->cmode = new_mode;//更新模式
		if (new_mode != HTB_CANT_SEND)
			htb_activate_prios(q, cl);
	} else //没有活跃优先级不管
		cl->cmode = new_mode;
}

1.类模式为HTB_MAY_BORROW状态,在父类的feed树中,也会在wait_pq中。

2.类模式为HTB_CAN_SEND状态,在调度矩阵的row树中。

3.类模式为HTB_CANT_SEND状态,只会在wait_pq中。

迟滞功能

HTB通过全局变量htb_hysteresis来实现迟滞功能,该功能的主要作用是延迟正向模式变换(正向模式变换指的是SEND到BORROW,BORROW到CANT)的时间。

HTB_CAN_SEND to HTB_MAY_BORROW

在正常情况下,当cl.tokens小于0时,模式要进行SEND到BORROW状态转换。开启迟滞模式后,小于0不会进行转换,而是到小于-cl->buffer时,才进行转换。

HTB_MAY_BORROW to HTB_CANT_SEND

在正常情况下,当cl.ctokens小于0时,模式要进行BORROW到CANT状态转换。开启迟滞模式后,小于0不会进行转换,而是到小于-cl->cbuffer时,才进行转换。

迟滞模式变相的增加了桶的大小,即burst的大小(提升了突发速率),并没有增加类的平均速率(因为令牌添加的速率没有变化,只是增加了桶的大小)。能减小模式变换,提升约15%的性能。

可以参考文章:http://luxik.cdi.cz/~devik/qos/htb/manual/theory.htm

//迟滞功能,变相的增加了burst的大小
static inline s64 htb_lowater(const struct htb_class *cl)
{
	if (htb_hysteresis)//hysteresis. 迟滞现象;滞后作用,磁滞现象;滞变
	    //在迟滞功能开启后,如果当前类不是处于HTB_CANT_SEND模式
	    //通过降低阈值到-cl->cbuffer,来延迟进入HTB_CANT_SEND模式的时间。
	    //提高效率。
		return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
	else
		return 0;
}
static inline s64 htb_hiwater(const struct htb_class *cl)
{
	if (htb_hysteresis)//开启迟滞模式后,如果当前模式是SEND模式,那么高水位为
	    //在迟滞功能开启后,如果当前类处于HTB_CAN_SEND模式
	    //通过降低阈值到-cl->buffer,来延迟进入非HTB_CAN_SEND模式的时间。
	    //提高效率。
		return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
	else //不开启永远为0
		return 0;
}

等待队列

每一个level都有一个等待红黑树,用于存放那些处于cant send状态的类,按照需要等待的时间组织该红黑树。

在进行令牌计算的时候,如果类的模式发生变化,从不是SEND变成SEND的时候,需要从等待队列摘除。如果从SEND变为不是SEND的话,需要加入到等待队列中。

htb_charge_class

/**
 * htb_charge_class - charges amount "bytes" to leaf and ancestors
 *
 * Routine assumes that packet "bytes" long was dequeued from leaf cl
 * borrowing from "level". It accounts bytes to ceil leaky bucket for
 * leaf and all ancestors and to rate bucket for ancestors at levels
 * "level" and higher. It also handles possible change of mode resulting
 * from the update. Note that mode can also increase here (MAY_BORROW to
 * CAN_SEND) because we can use more precise clock that event queue here.
 * In such case we remove class from event queue first.
 * 为了发送该报文,计算所需的令牌,调整结算后的令牌情况
 */
static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,//叶子类
			     int level,//本次调度的根树所在level,其对应的类一定是send模式的。 
			     struct sk_buff *skb)
{
	int bytes = qdisc_pkt_len(skb);//报文长度
	enum htb_cmode old_mode;//原来的模式
	s64 diff;
    //htb支持借用模式,当本类不够用时,需要去向父类借用,直到可以发送或者不能借到。
    //所以这里需要回溯父类。
	while (cl) {
		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);//计算本段时间需要补充的令牌数(令牌数转换成了时间)
		if (cl->level >= level) {//根类即以上,一定是SEND模式
			if (cl->level == level)//属于借出方
				cl->xstats.lends++;
			htb_accnt_tokens(cl, bytes, diff);
		} else {
			cl->xstats.borrows++;
			cl->tokens += diff;	/* we moved t_c; update tokens */
		}
		//
		htb_accnt_ctokens(cl, bytes, diff);
		cl->t_c = q->now;//更新上一次检查点

		old_mode = cl->cmode;
		diff = 0;
		//修改类的模式
		htb_change_class_mode(q, cl, &diff);
		if (old_mode != cl->cmode) {//模式发生变化
			if (old_mode != HTB_CAN_SEND)//可以发送了,从等待队列中移除
				htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
			if (cl->cmode != HTB_CAN_SEND)//不能发送了,加入到等待队列
				htb_add_to_wait_tree(q, cl, diff);
		}

		/* update basic stats except for leaves which are already updated */
		if (cl->level)
			bstats_update(&cl->bstats, skb);

		cl = cl->parent;//回溯到其父类
	}
}

htb_add_to_wait_tree

/**
 * htb_add_to_wait_tree - adds class to the event queue with delay
 *
 * The class is added to priority event queue to indicate that class will
 * change its mode in cl->pq_key microseconds. Make sure that class is not
 * already in the queue.
 * 添加一个类到事件队列中,延迟delay时间。
 * 这个类将会被添加到优先级事件队列中。指示该类在cl->pq_key毫秒后需要该表其模式。
 * 确保这个类不在队列中。
 */
static void htb_add_to_wait_tree(struct htb_sched *q,
				 struct htb_class *cl, s64 delay)
{
	//获取该层级的等待队列红黑树
	struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
    //计算到期时间
	cl->pq_key = q->now + delay;
	if (cl->pq_key == q->now)//至少一个毫秒
		cl->pq_key++;

	/* update the nearest event cache 更新该level最近的事件时间 */
	if (q->near_ev_cache[cl->level] > cl->pq_key)
		q->near_ev_cache[cl->level] = cl->pq_key;

	while (*p) {//遍历红黑树,找到插入节点
		struct htb_class *c;
		parent = *p;
		c = rb_entry(parent, struct htb_class, pq_node);
		if (cl->pq_key >= c->pq_key)
			p = &parent->rb_right;
		else
			p = &parent->rb_left;
	}
	//插入到红黑树中
	rb_link_node(&cl->pq_node, parent, p);
	//调整红黑树
	rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
}

等待事件已经发生

当等待的时间已经触发了,则需要将等待红黑树中的满足条件的类从红黑树中移除。

/**
 * htb_do_events - make mode changes to classes at the level
 *
 * Scans event queue for pending events and applies them. Returns time of
 * next pending event (0 for no event in pq, q->now for too many events).
 * Note: Applied are events whose have cl->pq_key <= q->now.
 */
static s64 htb_do_events(struct htb_sched *q, const int level,
			 unsigned long start)
{
	/* don't run for longer than 2 jiffies; 2 is used instead of
	 * 1 to simplify things when jiffy is going to be incremented
	 * too soon
	 * 运行不要超过2个jiffies
	 */
	unsigned long stop_at = start + 2;//遍历红黑树最多处理2个jiffies
	struct rb_root *wait_pq = &q->hlevel[level].wait_pq;

	while (time_before(jiffies, stop_at)) {//确保不超过运行时间
		struct htb_class *cl;
		s64 diff;
		struct rb_node *p = rb_first(wait_pq);//获取第一个最小的时间节点

		if (!p)//为空,则直接返回
			return 0;
        //根据成员得到类起始地址
		cl = rb_entry(p, struct htb_class, pq_node);
		if (cl->pq_key > q->now)//该类时间还没满足要求,退出,同时返回要求的时间,调用者根据该时间计算下次触发的最小时间
			return cl->pq_key;

		htb_safe_rb_erase(p, wait_pq);//从红黑树中摘除
		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
        //查看该类是否可以发送
		htb_change_class_mode(q, cl, &diff);
		if (cl->cmode != HTB_CAN_SEND)//不能发送则继续加入等待队列。
			htb_add_to_wait_tree(q, cl, diff);
	}

	/* too much load - let's continue after a break for scheduling */
    //本次处理超过了2个jiffies,打印告警
	if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
		pr_warn("htb: too many events!\n");
		q->warned |= HTB_WARN_TOOMANYEVENTS;
	}

	return q->now;
}
posted @ 2020-03-25 21:20  ouyangxibao  阅读(1040)  评论(0)    收藏  举报