HTB算法分析

入队
htb_enqueue
static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
int uninitialized_var(ret);
struct htb_sched *q = qdisc_priv(sch);//获取排队规程私有数据
struct htb_class *cl = htb_classify(skb, sch, &ret);
if (cl == HTB_DIRECT) {//类是直接发送类
/* enqueue to helper queue */
if (q->direct_queue.qlen < q->direct_qlen) {//直接发送队列是否已经满了,没有满则加入到尾部
htb_enqueue_tail(skb, sch, &q->direct_queue);
q->direct_pkts++;
} else {//否则丢弃
return qdisc_drop(skb, sch, to_free);
}
#ifdef CONFIG_NET_CLS_ACT
} else if (!cl) {//类为空直接丢弃
if (ret & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
__qdisc_drop(skb, to_free);
return ret;
#endif
} else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q,
to_free)) != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret)) {//入队失败,统计失败报文数
qdisc_qstats_drop(sch);
cl->drops++;
}
return ret;
} else {//报文进入叶子类的报文队列中,需要激活该类,因为该类有报文了
//入队成功,激活该类
htb_activate(q, cl);
}
//字节数统计
qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;//报文个数统计
return NET_XMIT_SUCCESS;
}
htb_activate
入队成功的话,说明有报文需要发送,激活调度。根据类的令牌情况,将叶子类,以及其父类相关令牌情况,如果处于BORROW状态的话加入到父类的供给红黑树中,否则加入到排队规程的可用红黑树树中。
/**
* htb_activate - inserts leaf cl into appropriate active feeds
*
* Routine learns (new) priority of leaf and activates feed chain
* for the prio. It can be called on already active leaf safely.
* It also adds leaf into droplist.
*/
static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
{
//类必须为叶子类,并且该类的队列不为空,并且队列中有报文
WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
if (!cl->prio_activity) {//如果类还没有活跃的优先级
cl->prio_activity = 1 << cl->prio;//设置活跃优先级
htb_activate_prios(q, cl);//激活,因为该类为叶子类,需要向上激活其父类
list_add_tail(&cl->un.leaf.drop_list,
q->drops + cl->prio);
}
}
/**
* htb_activate_prios - creates active classe's feed chain
*
* The class is connected to ancestors and/or appropriate rows
* for priorities it is participating on. cl->cmode must be new
* (activated) mode. It does nothing if cl->prio_activity == 0.
*/
static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
{
struct htb_class *p = cl->parent;//获取父类
long m, mask = cl->prio_activity;
while (cl->cmode == HTB_MAY_BORROW && p && mask) {//当前类为borrow状态的话,需要挂入父类供给树中
m = mask;//如果该类是中间类,那么有多个优先级
while (m) {
int prio = ffz(~m);//m的1的个数会每一次循环会被m &= ~(1 << prio)语句减小一个bit
m &= ~(1 << prio);//清除该优先级,准备下一个优先级
if (p->un.inner.clprio[prio].feed.rb_node)//父供给树在该优先级中已经有类激活了
/* parent already has its feed in use so that
* reset bit in mask as parent is already ok
* mask在该优先级上设置一个0,因为父类p在该优先级已经存在了p的父类的供给树中了,不需要再处理
*/
mask &= ~(1 << prio);
//将类挂入父类该优先级的供给树中
htb_add_to_id_tree(&p->un.inner.clprio[prio].feed, cl, prio);
}
//新增加的该优先级类活跃掩码
p->prio_activity |= mask;
//回溯父类
cl = p;
p = cl->parent;
}
if (cl->cmode == HTB_CAN_SEND && mask)//可发送队列
//插入htb排队规程的
htb_add_class_to_row(q, cl, mask);
}
/**
* htb_add_to_id_tree - adds class to the round robin list
*
* Routine adds class to the list (actually tree) sorted by classid.
* Make sure that class is not already on such list for given prio.
* 将该类添加到红黑树root中,使用类的node[prio]链入红黑树,因为对于中间类来说由多个活跃优先级
*/
static void htb_add_to_id_tree(struct rb_root *root,
struct htb_class *cl, int prio)
{
//树根
struct rb_node **p = &root->rb_node, *parent = NULL;
while (*p) {//遍历红黑树,找到插入的位置,插入的位置一定是一个空节点
struct htb_class *c;
parent = *p;
c = rb_entry(parent, struct htb_class, node[prio]);
if (cl->common.classid > c->common.classid)
p = &parent->rb_right;
else
p = &parent->rb_left;
}
//找到了插入位置,将节点cl->node[prio],插入到p的位置
rb_link_node(&cl->node[prio], parent, p);
//进行红黑树调整,使其满足红黑树的性质
rb_insert_color(&cl->node[prio], root);
}
出队
htb_dequeue
static struct sk_buff *htb_dequeue(struct Qdisc *sch)
{
struct sk_buff *skb;
struct htb_sched *q = qdisc_priv(sch);
int level;
s64 next_event;
unsigned long start_at;
/* try to dequeue direct packets as high prio (!) to minimize cpu work */
skb = __qdisc_dequeue_head(&q->direct_queue);
if (skb != NULL) {
ok:
qdisc_bstats_update(sch, skb);
qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
return skb;
}
if (!sch->q.qlen)
goto fin;
q->now = ktime_get_ns();//更新本次调度的起始时间
start_at = jiffies;//起始调度时间,每一次调度不能超过2个jiffies
next_event = q->now + 5LLU * NSEC_PER_SEC;
//从叶子节点开始调度,即从level 0开始,遍历每一个可以发送的红黑树。
//level 0中的数的每一个节点都是SEND的,其它level最后遍历到叶子节点
//时是borrow的,除了level对应的红黑树节点是send的,其它子树,子树的子树
//节点都是borrow的。
for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
/* common case optimization - skip event handler quickly */
int m;
s64 event = q->near_ev_cache[level];
//处理等待队列中的类,随着时间推移,供给的令牌增多后,需要将等待队列中的类移出来
if (q->now >= event) {
event = htb_do_events(q, level, start_at);
if (!event)//等待队列中没有类
event = q->now + NSEC_PER_SEC;
q->near_ev_cache[level] = event;
}
if (next_event > event)//记录最小的需要等待的时间
next_event = event;
m = ~q->row_mask[level];//获取当前level的活跃队列红黑树掩码取反
while (m != (int)(-1)) {//根据优先级遍历每一个活跃的红黑树,这里完全按照优先级遍历,只要更高优先级有报文发送,那么就不会发送次优先级
int prio = ffz(m);//0的个数
m |= 1 << prio;//去掉一个0,以便下一次遍历下一个优先级
//查看该level的该优先级中是否有报文能够发送
skb = htb_dequeue_tree(q, prio, level);
if (likely(skb != NULL))
goto ok;//发送报文
}
}
//没有报文能够发送,添加统计
qdisc_qstats_overlimit(sch);
if (likely(next_event > q->now)))//下一个等待事件发生还有一段时间,启动看门狗事件
qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
else
schedule_work(&q->work);//还有要调度的类,但是本次处理时间过长,先结束本次调度,让work调度时重启本排队规程调度
fin:
return skb;
}

htb_dequeue_tree
/* dequeues packet at given priority and level; call only if
* you are sure that there is active class at prio/level
* 假设level为1。prio为3
*/
static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
const int level)
{
struct sk_buff *skb = NULL;
struct htb_class *cl, *start;
struct htb_level *hlevel = &q->hlevel[level];
struct htb_prio *hprio = &hlevel->hprio[prio];
/* look initial class up in the row 查找到第一个可以发送的叶子类 */
start = cl = htb_lookup_leaf(hprio, prio);
do {
next:
if (unlikely(!cl))
return NULL;
/* class can be empty - it is unlikely but can be true if leaf
* qdisc drops packets in enqueue routine or if someone used
* graft operation on the leaf since last dequeue;
* simply deactivate and skip such class
*/
if (unlikely(cl->un.leaf.q->q.qlen == 0)) {//队列为空
struct htb_class *next;
htb_deactivate(q, cl);//将该类去激活
/* row/level might become empty 该level已经没有处于SEND状态的类了,直接返回空 */
if ((q->row_mask[level] & (1 << prio)) == 0)
return NULL;
//开始查找下一个可发送的叶子类
next = htb_lookup_leaf(hprio, prio);
if (cl == start) /* fix start if we just deleted it */
start = next;
cl = next;
goto next;
}
//叶子类出报文
skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
if (likely(skb != NULL))//成功,退出,下一次会继续从该类中出报文。
break;
qdisc_warn_nonwc("htb", cl->un.leaf.q);
//该叶子节点获取报文失败,更新树的遍历迭代上下文,即父类的该优先级红黑树。
//如果当前叶子节点cl为8,因为level为1,所以需要调整tree2的遍历上下文。
//该上下文保存在节点6对应的类结构中cl->parent->un.inner.clprio[prio].ptr
//切换到节点9
htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr:
&q->hlevel[0].hprio[prio].ptr);
//切换到下一个节点之后,继续查找。
cl = htb_lookup_leaf(hprio, prio);
} while (cl != start);//回到起始类也退出。
if (likely(skb != NULL)) {//报文出队列成功
bstats_update(&cl->bstats, skb);//更新字节统计
//记性额度处理
cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
if (cl->un.leaf.deficit[level] < 0) {//额度用完,开启下一次循环调度的额度
cl->un.leaf.deficit[level] += cl->quantum;
//额度用完,强制调度下一个节点,切换到节点9
htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr :
&q->hlevel[0].hprio[prio].ptr);
}
/* this used to be after charge_class but this constelation
* gives us slightly better performance
* 如果该报文是队列的最后一个报文。那么该队列不再活跃,去活跃处理。
*/
if (!cl->un.leaf.q->q.qlen)
htb_deactivate(q, cl);
//对出报文skb进行令牌计算。
htb_charge_class(q, cl, level, skb);
}
return skb;
}
htb_lookup_leaf
/**
* htb_lookup_leaf - returns next leaf class in DRR order
* Find leaf where current feed pointers points to.
*/
//假设输入的参数hprio->row.rb_node为上图中tree1。注意调用该函数一定是从hlevel->hprio[prio]
//中传入的,其中的类都是出于SEND模式。prio为3
static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
{
int i;
struct {
struct rb_node *root;
struct rb_node **pptr;
u32 *pid;
} stk[TC_HTB_MAXDEPTH], *sp = stk;
BUG_ON(!hprio->row.rb_node);
sp->root = hprio->row.rb_node;//获取树根,我们假设为上图中的4节点,类id为4。
sp->pptr = &hprio->ptr;//从上次访问的类继续开始,迭代遍历
sp->pid = &hprio->last_ptr_id;
for (i = 0; i < 65535; i++) {//最多循环65536次,找到符合期望的叶子节点
if (!*sp->pptr && *sp->pid) {//指针没有指向上次具体操作的类,但是上次操作的类id存在
/* ptr was invalidated but id is valid - try to recover
* the original or next ptr 根据类id获取类地址。
* 根据类id获取类地址,*sp->pptr可能为pid对应的节点,也有可能是下一个节点。
*/
*sp->pptr = htb_id_find_next_upper(prio, sp->root, *sp->pid);
}
*sp->pid = 0; /* ptr is valid now so that remove this hint as it
* can become out of date quickly
*/
if (!*sp->pptr) {/* we are at right end; rewind & go up
* 没有指定的类啦,当前树已经遍历完毕,这里为该树下一轮遍历做准备。
* 假设当前遍历的是tree2。
*/
*sp->pptr = sp->root;//查找到最小节点
while ((*sp->pptr)->rb_left)//找到树最左边的节点,即节点8
*sp->pptr = (*sp->pptr)->rb_left;
//本棵树已经遍历完了,跳到上一棵树tree1,那么节点为6,*sp->pptr == 6节点指针
if (sp > stk) {//没有到入口level,当sp==stk时,表示回到了hprio
sp--;//跳到遍历节点6所在树的栈
if (!*sp->pptr) {//不可能为空,因为进入tree2是从节点6进去的。
WARN_ON(1);
return NULL;
}
//获取节点6的下一个类节点,即节点7
htb_next_rb_node(sp->pptr);
}
} else {//上次操作的类还在
struct htb_class *cl;
struct htb_prio *clp;
//根据地址找到类描述控制块
cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
if (!cl->level)//是叶子类,直接返回。比如上面的节点8。这里没有将sp->pptr和sp->pid
//变到下一个节点9,是因为需要从叶子节点8的队列中提取一个报文进行发送。
//由调用该函数的函数负责提取,当提取报文失败的时候,说明该节点遍历完毕
//由调用函数htb_dequeue_tree负责切换到写一个节点。
return cl;
//如果cl为tree1的6节点,那么clp将会是11节点
clp = &cl->un.inner.clprio[prio];
//查找栈增加一格,用来维护树tree2查找上下文。
(++sp)->root = clp->feed.rb_node;
sp->pptr = &clp->ptr;//获取迭代器
sp->pid = &clp->last_ptr_id;
}
}
WARN_ON(1);//永远不可能为NULL,因为入口点时从可发送队列开始的。
return NULL;
}
htb_deactivate
/**
* htb_deactivate - remove leaf cl from active feeds
*
* Make sure that leaf is active. In the other words it can't be called
* with non-active leaf. It also removes class from the drop list.
* 将一个叶子类从调度矩阵递归树中移除,调整矩阵递归树。
*/
static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
{
WARN_ON(!cl->prio_activity);//原来必须是活跃的
htb_deactivate_prios(q, cl);
cl->prio_activity = 0;//取消标志
list_del_init(&cl->un.leaf.drop_list);//从丢包链表中摘除
}
/**
* htb_deactivate_prios - remove class from feed chain
*
* cl->cmode must represent old mode (before deactivation). It does
* nothing if cl->prio_activity == 0. Class is removed from all feed
* chains and rows.
* 将一个类从供给链中移除,需要递归到顶层。
*/
static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
{
struct htb_class *p = cl->parent;//父类
long m, mask = cl->prio_activity;//该类活跃的优先级,意味着需要从父类的多个供给树中移除。
//mask表示本类需要去掉的活跃优先级,如果本类是父类的最后一个活跃的类,那么需要将父类该优先级
//也去掉,所以需要递归直到mask为0
while (cl->cmode == HTB_MAY_BORROW && p && mask) {//当前类必须是处于borrow状态,只有处于borrow状态才会在供给树中。否则在row树中
m = mask;
mask = 0;//父类需要去掉的活跃优先级
while (m) {//遍历每一个活跃优先级
int prio = ffz(~m);
m &= ~(1 << prio);
//该类是父供给树迭代调度的当前节点
if (p->un.inner.clprio[prio].ptr == cl->node + prio) {
/* we are removing child which is pointed to from
* parent feed - forget the pointer but remember
* classid
* 因为要从树中摘除,所以不再记录指针,记录指针页找不到下一个节点了,因为关系变了
* 只需要记录类id。下一次调度的时候自动获取类id更大的节点。以p->un.inner.clprio[prio].ptr == null为标志
* 详细可以看函数htb_lookup_leaf
*/
p->un.inner.clprio[prio].last_ptr_id = cl->common.classid;
p->un.inner.clprio[prio].ptr = NULL;
}
//将该节点从红黑树中移除
htb_safe_rb_erase(cl->node + prio,
&p->un.inner.clprio[prio].feed);
//如果父类的该优先级供给树为空了,那么该父类该优先级标志需要去掉。
if (!p->un.inner.clprio[prio].feed.rb_node)
//父类该优先级不再活跃
mask |= 1 << prio;
}
//去掉父类不再活跃的优先级,然后递归到更高一级
p->prio_activity &= ~mask;
cl = p;
p = cl->parent;
}
if (cl->cmode == HTB_CAN_SEND && mask)//在row中,mask表示cl需要去掉的活跃优先级
htb_remove_class_from_row(q, cl, mask);
}
/**
* htb_remove_class_from_row - removes class from its row
*
* The class is removed from row at priorities marked in mask.
* It does nothing if mask == 0.
*/
static inline void htb_remove_class_from_row(struct htb_sched *q,
struct htb_class *cl, int mask)
{
int m = 0;
struct htb_level *hlevel = &q->hlevel[cl->level];
while (mask) {//遍历每一个优先级
int prio = ffz(~mask);
struct htb_prio *hprio = &hlevel->hprio[prio];
mask &= ~(1 << prio);
if (hprio->ptr == cl->node + prio)//如果要移除的节点是调度上下文中的节点,则更新红黑树的遍历上下文到下一个节点
htb_next_rb_node(&hprio->ptr);
//将节点从红黑树中移除
htb_safe_rb_erase(cl->node + prio, &hprio->row);
if (!hprio->row.rb_node)//该类是该优先级中最后一个活跃的节点,则说明该level需要去掉该红黑树
m |= 1 << prio;
}
//移除本level需要去掉的活跃优先级
q->row_mask[cl->level] &= ~m;
}
令牌计算
在没开启迟滞功能前。

htb_charge_class
/**
* htb_charge_class - charges amount "bytes" to leaf and ancestors
*
* Routine assumes that packet "bytes" long was dequeued from leaf cl
* borrowing from "level". It accounts bytes to ceil leaky bucket for
* leaf and all ancestors and to rate bucket for ancestors at levels
* "level" and higher. It also handles possible change of mode resulting
* from the update. Note that mode can also increase here (MAY_BORROW to
* CAN_SEND) because we can use more precise clock that event queue here.
* In such case we remove class from event queue first.
* 为了发送该报文,计算所需的令牌,调整结算后的令牌情况
*/
static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,//叶子类
int level,//本次调度的根树所在level,其对应的类一定是send模式的。
struct sk_buff *skb)
{
int bytes = qdisc_pkt_len(skb);//报文长度
enum htb_cmode old_mode;//原来的模式
s64 diff;
//htb支持借用模式,当本类不够用时,需要去向父类借用,直到可以发送或者不能借到。
//所以这里需要回溯父类。
while (cl) {
diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);//计算本段时间需要补充的令牌数(令牌数转换成了时间)
if (cl->level >= level) {//根类即以上,一定是SEND模式,C桶中还有令牌
if (cl->level == level)//属于借出方
cl->xstats.lends++;
htb_accnt_tokens(cl, bytes, diff);//进行c桶令牌计算
} else {//已经是处于借用模式,不再计算本报文的c桶令牌。
//为什么不计算呢?tbf则计算了,这有什么不同。
//这里不处理已经处于借用模式下的类的c桶,正是体现了借用的含义
//因为发送这个报文是用的别人的令牌。
cl->xstats.borrows++;
cl->tokens += diff; /* we moved t_c; update tokens */
}
//计算租借的令牌数,都要计算
htb_accnt_ctokens(cl, bytes, diff);
cl->t_c = q->now;//更新上一次检查点
old_mode = cl->cmode;
diff = 0;
//修改类的模式,diff用来记录不足的令牌数,用于等待队列
htb_change_class_mode(q, cl, &diff);
if (old_mode != cl->cmode) {
if (old_mode != HTB_CAN_SEND)//可以发送了,从等待队列中移除
htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
if (cl->cmode != HTB_CAN_SEND)//不能发送了,加入到等待队列
htb_add_to_wait_tree(q, cl, diff);
}
/* update basic stats except for leaves which are already updated */
if (cl->level)
bstats_update(&cl->bstats, skb);
cl = cl->parent;//回溯到其父类
}
}
1.模式不是HTB_CAN_SEND状态,说明其一定在wait_pq队列中;反之模式为HTB_CAN_SEND状态,则一定不在wait_pq队列中。
htb_change_class_mode
/**
* htb_change_class_mode - changes classe's mode
*
* This should be the only way how to change classe's mode under normal
* cirsumstances. Routine will update feed lists linkage, change mode
* and add class to the wait event queue if appropriate. New mode should
* be different from old one and cl->pq_key has to be valid if changing
* to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
*/
static void
htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
{
enum htb_cmode new_mode = htb_class_mode(cl, diff);
if (new_mode == cl->cmode)//模式相同,不处理
return;
if (new_mode == HTB_CANT_SEND)//统计不能发送次数
cl->overlimits++;
if (cl->prio_activity) { /* not necessary: speed optimization 如果该类存在活跃的优先级,即有报文等待发送 */
if (cl->cmode != HTB_CANT_SEND)//原来模式不是不可以发送,那么现在是不可以发送。
//这个解释比较拗口,其等价意思是,原来可以发送,那么现在不能发送了,将该类去激活
htb_deactivate_prios(q, cl);
cl->cmode = new_mode;//更新模式
if (new_mode != HTB_CANT_SEND)
htb_activate_prios(q, cl);
} else //没有活跃优先级不管
cl->cmode = new_mode;
}
1.类模式为HTB_MAY_BORROW状态,在父类的feed树中,也会在wait_pq中。
2.类模式为HTB_CAN_SEND状态,在调度矩阵的row树中。
3.类模式为HTB_CANT_SEND状态,只会在wait_pq中。
迟滞功能
HTB通过全局变量htb_hysteresis来实现迟滞功能,该功能的主要作用是延迟正向模式变换(正向模式变换指的是SEND到BORROW,BORROW到CANT)的时间。
HTB_CAN_SEND to HTB_MAY_BORROW
在正常情况下,当cl.tokens小于0时,模式要进行SEND到BORROW状态转换。开启迟滞模式后,小于0不会进行转换,而是到小于-cl->buffer时,才进行转换。
HTB_MAY_BORROW to HTB_CANT_SEND
在正常情况下,当cl.ctokens小于0时,模式要进行BORROW到CANT状态转换。开启迟滞模式后,小于0不会进行转换,而是到小于-cl->cbuffer时,才进行转换。
迟滞模式变相的增加了桶的大小,即burst的大小(提升了突发速率),并没有增加类的平均速率(因为令牌添加的速率没有变化,只是增加了桶的大小)。能减小模式变换,提升约15%的性能。
可以参考文章:http://luxik.cdi.cz/~devik/qos/htb/manual/theory.htm
//迟滞功能,变相的增加了burst的大小
static inline s64 htb_lowater(const struct htb_class *cl)
{
if (htb_hysteresis)//hysteresis. 迟滞现象;滞后作用,磁滞现象;滞变
//在迟滞功能开启后,如果当前类不是处于HTB_CANT_SEND模式
//通过降低阈值到-cl->cbuffer,来延迟进入HTB_CANT_SEND模式的时间。
//提高效率。
return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
else
return 0;
}
static inline s64 htb_hiwater(const struct htb_class *cl)
{
if (htb_hysteresis)//开启迟滞模式后,如果当前模式是SEND模式,那么高水位为
//在迟滞功能开启后,如果当前类处于HTB_CAN_SEND模式
//通过降低阈值到-cl->buffer,来延迟进入非HTB_CAN_SEND模式的时间。
//提高效率。
return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
else //不开启永远为0
return 0;
}
等待队列
每一个level都有一个等待红黑树,用于存放那些处于cant send状态的类,按照需要等待的时间组织该红黑树。
在进行令牌计算的时候,如果类的模式发生变化,从不是SEND变成SEND的时候,需要从等待队列摘除。如果从SEND变为不是SEND的话,需要加入到等待队列中。
htb_charge_class
/**
* htb_charge_class - charges amount "bytes" to leaf and ancestors
*
* Routine assumes that packet "bytes" long was dequeued from leaf cl
* borrowing from "level". It accounts bytes to ceil leaky bucket for
* leaf and all ancestors and to rate bucket for ancestors at levels
* "level" and higher. It also handles possible change of mode resulting
* from the update. Note that mode can also increase here (MAY_BORROW to
* CAN_SEND) because we can use more precise clock that event queue here.
* In such case we remove class from event queue first.
* 为了发送该报文,计算所需的令牌,调整结算后的令牌情况
*/
static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,//叶子类
int level,//本次调度的根树所在level,其对应的类一定是send模式的。
struct sk_buff *skb)
{
int bytes = qdisc_pkt_len(skb);//报文长度
enum htb_cmode old_mode;//原来的模式
s64 diff;
//htb支持借用模式,当本类不够用时,需要去向父类借用,直到可以发送或者不能借到。
//所以这里需要回溯父类。
while (cl) {
diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);//计算本段时间需要补充的令牌数(令牌数转换成了时间)
if (cl->level >= level) {//根类即以上,一定是SEND模式
if (cl->level == level)//属于借出方
cl->xstats.lends++;
htb_accnt_tokens(cl, bytes, diff);
} else {
cl->xstats.borrows++;
cl->tokens += diff; /* we moved t_c; update tokens */
}
//
htb_accnt_ctokens(cl, bytes, diff);
cl->t_c = q->now;//更新上一次检查点
old_mode = cl->cmode;
diff = 0;
//修改类的模式
htb_change_class_mode(q, cl, &diff);
if (old_mode != cl->cmode) {//模式发生变化
if (old_mode != HTB_CAN_SEND)//可以发送了,从等待队列中移除
htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
if (cl->cmode != HTB_CAN_SEND)//不能发送了,加入到等待队列
htb_add_to_wait_tree(q, cl, diff);
}
/* update basic stats except for leaves which are already updated */
if (cl->level)
bstats_update(&cl->bstats, skb);
cl = cl->parent;//回溯到其父类
}
}
htb_add_to_wait_tree
/**
* htb_add_to_wait_tree - adds class to the event queue with delay
*
* The class is added to priority event queue to indicate that class will
* change its mode in cl->pq_key microseconds. Make sure that class is not
* already in the queue.
* 添加一个类到事件队列中,延迟delay时间。
* 这个类将会被添加到优先级事件队列中。指示该类在cl->pq_key毫秒后需要该表其模式。
* 确保这个类不在队列中。
*/
static void htb_add_to_wait_tree(struct htb_sched *q,
struct htb_class *cl, s64 delay)
{
//获取该层级的等待队列红黑树
struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
//计算到期时间
cl->pq_key = q->now + delay;
if (cl->pq_key == q->now)//至少一个毫秒
cl->pq_key++;
/* update the nearest event cache 更新该level最近的事件时间 */
if (q->near_ev_cache[cl->level] > cl->pq_key)
q->near_ev_cache[cl->level] = cl->pq_key;
while (*p) {//遍历红黑树,找到插入节点
struct htb_class *c;
parent = *p;
c = rb_entry(parent, struct htb_class, pq_node);
if (cl->pq_key >= c->pq_key)
p = &parent->rb_right;
else
p = &parent->rb_left;
}
//插入到红黑树中
rb_link_node(&cl->pq_node, parent, p);
//调整红黑树
rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
}
等待事件已经发生
当等待的时间已经触发了,则需要将等待红黑树中的满足条件的类从红黑树中移除。
/**
* htb_do_events - make mode changes to classes at the level
*
* Scans event queue for pending events and applies them. Returns time of
* next pending event (0 for no event in pq, q->now for too many events).
* Note: Applied are events whose have cl->pq_key <= q->now.
*/
static s64 htb_do_events(struct htb_sched *q, const int level,
unsigned long start)
{
/* don't run for longer than 2 jiffies; 2 is used instead of
* 1 to simplify things when jiffy is going to be incremented
* too soon
* 运行不要超过2个jiffies
*/
unsigned long stop_at = start + 2;//遍历红黑树最多处理2个jiffies
struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
while (time_before(jiffies, stop_at)) {//确保不超过运行时间
struct htb_class *cl;
s64 diff;
struct rb_node *p = rb_first(wait_pq);//获取第一个最小的时间节点
if (!p)//为空,则直接返回
return 0;
//根据成员得到类起始地址
cl = rb_entry(p, struct htb_class, pq_node);
if (cl->pq_key > q->now)//该类时间还没满足要求,退出,同时返回要求的时间,调用者根据该时间计算下次触发的最小时间
return cl->pq_key;
htb_safe_rb_erase(p, wait_pq);//从红黑树中摘除
diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
//查看该类是否可以发送
htb_change_class_mode(q, cl, &diff);
if (cl->cmode != HTB_CAN_SEND)//不能发送则继续加入等待队列。
htb_add_to_wait_tree(q, cl, diff);
}
/* too much load - let's continue after a break for scheduling */
//本次处理超过了2个jiffies,打印告警
if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
pr_warn("htb: too many events!\n");
q->warned |= HTB_WARN_TOOMANYEVENTS;
}
return q->now;
}

浙公网安备 33010602011771号