cgroup机制

主要机制

测试功能

测试1

用户态测试

  • task1是主进程,先手动创建mygroup组,sudo mkdir /sys/fs/cgroup/mygroup
  • 然后task1通过函数将自己的pid号加入到 mygroup/cgroup.procs 中,task1调用system启动子进程task2
  • task2再调用system启动它的子进程task3
  • 最后观察 mygroup/cgroup.procs ,可以发现task2和task3的pid被自动加入其中,证明了在用户态cgroup机制的可行性
/*---------------------------task1.c------------------------------*/

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <errno.h>


#define CGROUP_PATH "/sys/fs/cgroup/mygroup"

int add_pid_to_cgroup(pid_t pid) {
    char path[256];
    snprintf(path, sizeof(path), "%s/cgroup.procs", CGROUP_PATH);

    FILE *fp = fopen(path, "w");
    if (!fp) {
        perror("Failed to open cgroup.procs");
        return -1;
    }

    fprintf(fp, "%d", pid);
    fclose(fp);
    return 0;
}

int main(){
    pid_t pid = getpid();
    printf("task1 PID: %d\n", pid);
    // 把主PID加入cgroup
    if (add_pid_to_cgroup(pid) != 0) {
        fprintf(stderr, "Failed to add main pid to cgroup\n");
        return -1;
    }

    sleep(5);

    printf("执行task2\n");
    
    system("./task2"); //system()自带同步阻塞机制,不用担心task2和task1进程会争抢cpu

    return 0;
}

/*------------------------------------task2.c---------------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <errno.h>

int main(){
    pid_t pid = getpid();
    printf("task2 PID: %d\n", pid);
    sleep(5);
    
    printf("执行task3\n");
    system("./task3");
    //system(task2);
}

/*-----------------------------------------------task3.c-------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <errno.h>

int main(){
    pid_t pid = getpid();
    printf("task3 PID: %d\n", pid);
    sleep(5);
    printf("结束\n");
    //system(task2);
}
/*----------------------------------------monitor.c----------------------------------*/
//用于监控/sys/fs/cgroup/mygroup/cgroup.procs

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>

#define CGROUP_PROCS_PATH "/sys/fs/cgroup/mygroup/cgroup.procs"

void print_cgroup_procs() {
    FILE *fp = fopen(CGROUP_PROCS_PATH, "r");
    if (!fp) {
        perror("Failed to open cgroup.procs");
        return;
    }

    printf("=== cgroup.procs snapshot ===\n");

    int pid;
    while (fscanf(fp, "%d", &pid) == 1) {
        printf("PID in cgroup: %d\n", pid);
    }
    fclose(fp);
    printf("=============================\n\n");
}

int main() {
    printf("Monitoring %s\n", CGROUP_PROCS_PATH);

    while (1) {
        print_cgroup_procs();
        sleep(1);  // 每秒打印一次
    }

    return 0;
}

测试结果,taks2和task3自动继承到mygroup中,pid自动写入了文件/sys/fs/cgroup/mygroup/cgroup.procs

测试2

用户态测试

  • task1是主进程,先手动创建mygroup组,sudo mkdir /sys/fs/cgroup/mygroup
  • 然后task1通过函数将自己的pid号加入到 mygroup/cgroup.procs 中,然后在线程中启动task2
  • 观察mygroup/cgroup.procs,其中并没有task2的pid号,因为线程启动的进程pid和之前task1的是同一个,如果观察mygroup/cgroup.threads是有task2的启动记录的
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <pthread.h>

#define CGROUP_PATH "/sys/fs/cgroup/mygroup"

/* ---------- 把 task2 的代码改造成线程函数 ---------- */
void* task2_thread(void* arg)
{
    pid_t pid = getpid();        // 注意:线程里 PID 与主线程相同
    printf("task2 thread running, PID: %d  TID: %ld\n",
           pid, (long)pthread_self());
    sleep(20);
    return NULL;
}
/* -------------------------------------------------- */

int add_pid_to_cgroup(pid_t pid)
{
    char path[256];
    snprintf(path, sizeof(path), "%s/cgroup.procs", CGROUP_PATH);
    FILE *fp = fopen(path, "w");
    if (!fp) {
        perror("Failed to open cgroup.procs");
        return -1;
    }
    fprintf(fp, "%d", pid);
    fclose(fp);
    return 0;
}

int main(void)
{
    pid_t pid = getpid();
    printf("task1 PID: %d\n", pid);

    if (add_pid_to_cgroup(pid) != 0) {
        fprintf(stderr, "Failed to add main pid to cgroup\n");
        return EXIT_FAILURE;
    }

    sleep(5);

    printf("创建线程执行 task2 逻辑...\n");
    pthread_t tid;
    if (pthread_create(&tid, NULL, task2_thread, NULL) != 0) {
        perror("pthread_create");
        return EXIT_FAILURE;
    }

    pthread_join(tid, NULL);   // 主线程等待 task2 线程结束
    return 0;
}

测试结果:如果是主进程用线程启动一个任务,这个新任务的pid和主进程的pid是一样的,所以procs文件中没有新的pid号,但是threads文件中会有新任务的tid号

测试3

测试步骤:

  • 1.用户态下,手动创建目录/sys/fs/cgroup/mygroup/ 因为尝试了程序里面调用api创建mygroup,失败了,好多api是v1层级下使用的,也有好多函数没有导出符号给我们使用
  • 2.用cgroup_attach_task将主进程加入到mygroup,并且打印自身pid以及找出自身所在的cgroup的相对路径
  • 3.然后打印procs里面的内容
  • 4.kthread打印自身所在cgroup的相对路径
  • 5.kthread调用 cgroup_attach_task将自己加入主进程组中,再次打印相对路径
  • 6.再次打印procs里面的内容

// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/cgroup.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/kernel_read_file.h>

#define BUF_LEN 256
#define NEW_CGROUP_PATH "mygroup"   /* 相对于根,不含前导 / */

/* ---------- 回显 cgroup.procs ---------- */

static void show_procs(void)
{
    struct file *filp;
    loff_t pos = 0;
    char buf[256];
    ssize_t ret;

    filp = filp_open("/sys/fs/cgroup/mygroup/cgroup.procs",
                     O_RDONLY, 0);
    if (IS_ERR(filp)) {
        pr_err("open cgroup.procs failed: %ld\n", PTR_ERR(filp));
        return;
    }

    ret = kernel_read(filp, buf, sizeof(buf) - 1, &pos);
    if (ret >= 0) {
        buf[ret] = '\0';
        pr_info("cgroup.procs: %s", buf);
    }

    filp_close(filp, NULL);
}

/* ---------- 子线程 ---------- */
static int child_fn(void *arg)
{
    char *buf;
    struct task_struct *parent = arg;

    buf = kmalloc(BUF_LEN, GFP_KERNEL);
    if (!buf)
        return -ENOMEM;

    cgroup_path(current->cgroups->dfl_cgrp, buf, BUF_LEN);
    pr_info("child[%d] (before): %s\n", current->pid, buf);
    show_procs();

    if (!cgroup_attach_task(parent->cgroups->dfl_cgrp, current, false))
        pr_info("child[%d] moved\n", current->pid);

    cgroup_path(current->cgroups->dfl_cgrp, buf, BUF_LEN);
    pr_info("child[%d] (after): %s\n", current->pid, buf);
    show_procs();

    kfree(buf);
    return 0;
}

/* ---------- 初始化 ---------- */
static int __init whoami_init(void)
{
    struct cgroup *cgrp;
    char *buf;

    /* 1. 用户态必须提前建好 /sys/fs/cgroup/mygroup */
    cgrp = cgroup_get_from_path(NEW_CGROUP_PATH);
    if (IS_ERR(cgrp)) {
        pr_err("cannot find %s (%ld)\n", NEW_CGROUP_PATH, PTR_ERR(cgrp));
        return PTR_ERR(cgrp);
    }

    /* 2. 迁移主进程 */
    if (!cgroup_attach_task(cgrp, current, false))
        pr_info("parent[%d] moved to %s\n", current->pid, NEW_CGROUP_PATH);
    cgroup_put(cgrp);

    buf = kmalloc(BUF_LEN, GFP_KERNEL);
    if (buf) {
        cgroup_path(current->cgroups->dfl_cgrp, buf, BUF_LEN);
        pr_info("parent[%d] now in: %s\n", current->pid, buf);
        show_procs();
        kfree(buf);
    }

    /* 3. 启动子线程 */
    kthread_run(child_fn, current, "whoami_child");
    return 0;
}

static void __exit whoami_exit(void)
{
    pr_info("module unloaded\n");
}

module_init(whoami_init);
module_exit(whoami_exit);
MODULE_LICENSE("GPL");

将编译好的模块加入到内核中,测试结果如下:

sudo dmesg | tail
 
[ 2875.672622] parent[4779] moved to mygroup
[ 2875.672629] parent[4779] now in: /mygroup
[ 2875.672670] cgroup.procs: 4779
[ 2875.672890] child[4780] (before): /
[ 2875.672910] cgroup.procs: 4779
[ 2875.672934] child[4780] moved
[ 2875.672935] child[4780] (after): /mygroup
[ 2875.672948] cgroup.procs: 4779
               4780

可以看到,一开始procs里面只有parent进程的pid,后面在把child拉进mygroup之前,child默认加入的cgroup组是根目录"/sys/fs/cgroup/",在使用cgroup_attach_task函数将child拉进mygroup之后,procs里面变成了parent和child的两个进程pid

遇到的问题

其中cgroup_attach_task函数存在于 cgroup.c 中,但是这个函数并没有导出符号,因此想要使用的话,需要修改内核源码,将这个函数符号导出,并且重新编译内核和安装,在新的内核下才可以使用该函数

创建cgroup组只能在用户态下创建

之前试过各种内核环境下创建 cgroup 组,利用内核API在 /sys/fs/cgroup/下创建cgroup目录,其中还导出了cgroup_mkdir()的符号,重新编译内核,使其能够被调用,但是cgroup_mkdir()需要以 当前 task 的上下文作为 owner task,cgroup_mkdir() 最终会检查权限和上下文,如果你是通过内核模块直接调用这个函数,它默认使用 current task 作为“操作者”,但此时 current 是内核线程,权限不足或上下文不对。

所以创建cgroup组并不是简单的在根路径(/sys/fs/cgroup/)下创建目录,在用户态下的sudo mkdir mygroup其中包含了很多内核操作,仅仅用cgroup_mkdir()函数解决不了问题

最终方案:我们必须通过用户空间调用 mkdir() 来创建 cgroup 目录 (至少目前的研究现况来说是的

下面是通过调用call_usermodehelper,传入相关参数,在内核态下调用用户态来执行mkdir的操作

#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/delay.h>

#define  MAX_ARGV_LEN    512


int  do_shell_by_default(char *  cmd, int mode)
{   
    char *shell_argv[5] = {NULL};
    char *shell_tmp =NULL;
    int ret  = 0;
    int i = 0;
    char *envp[] = {    
        "HOME=/",
        "TERM=xterm",
        "USER=root",
        "PATH=/sbin:/bin:/usr/sbin:/usr/bin:/bin/:/usr/local/bin",
        NULL
    };
    
    //char argv[5][MAX_ARGV_LEN];
    shell_tmp =  kmalloc(5*sizeof(char)*MAX_ARGV_LEN, GFP_KERNEL);  //

    if(shell_tmp == NULL){
        ret = -1;
        return -1;
    }
    
    for(i = 0; i<5; i++)
    {
        shell_argv[i] =  shell_tmp+(i*MAX_ARGV_LEN);   //
    }

    strncpy(shell_argv[0],"/bin/bash",MAX_ARGV_LEN);
    strncpy(shell_argv[1],"-c",MAX_ARGV_LEN);
    strncpy(shell_argv[2],cmd,MAX_ARGV_LEN);
    shell_argv[3] = NULL;

    ret = call_usermodehelper(shell_argv[0],shell_argv,envp,mode);   //默认等待执行完毕

    for(i = 0; i<5; i++)
    {
        shell_argv[i] = NULL;  //恢复指针
    }

    kfree(shell_tmp);  //释放指针

    pr_info("do shell cmd %s,return %d\n", cmd, ret);

    return ret;
}

static int __init create_cgroup_demo(void)
{
    char cmd[128];
    const char *name = "mygroup";          /* 想创建的 cgroup 名 */
    int ret;

    /* 构造命令:mkdir -p /sys/fs/cgroup/<name> */
    snprintf(cmd, sizeof(cmd), "mkdir -p /sys/fs/cgroup/%s", name);

    /* 同步执行(等待返回) */
    ret = do_shell_by_default(cmd, UMH_WAIT_EXEC);
    if (ret) {
        pr_err("create_cgroup: mkdir failed, ret=%d\n", ret);
        return ret;
    }

    pr_info("create_cgroup: /sys/fs/cgroup/%s created\n", name);
    return 0;
}

static void __exit destroy_cgroup_demo(void)
{
    /* 可选:模块卸载时删掉目录 */
    char cmd[128];
    const char *name = "mygroup";
    snprintf(cmd, sizeof(cmd), "rmdir /sys/fs/cgroup/%s", name);
    do_shell_by_default(cmd, UMH_WAIT_EXEC);
}

module_init(create_cgroup_demo);
module_exit(destroy_cgroup_demo);
MODULE_LICENSE("GPL");

最终能够成功在内核环境下创建目录

内核使用call_userhelper启动用户态创建cgroup组目录

下面代码能够实现在内核环境下用call_userhelper启动用户态创建cgroup组目录,然后返回这个组的cgrp结构体,用于guard-device项目中

#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/cgroup.h>
#include <linux/err.h>

#define MAX_ARGV_LEN 512

/* ---------- 工具函数:执行 shell ---------- */
int do_shell_by_default(char *cmd, int mode)
{
    char *shell_argv[5] = { NULL };
    char *shell_tmp;
    int ret, i;
    char *envp[] = {
        "HOME=/",
        "TERM=xterm",
        "USER=root",
        "PATH=/sbin:/bin:/usr/sbin:/usr/bin:/bin:/usr/local/bin",
        NULL
    };

    shell_tmp = kmalloc(5 * MAX_ARGV_LEN, GFP_KERNEL);
    if (!shell_tmp)
        return -ENOMEM;

    for (i = 0; i < 5; i++)
        shell_argv[i] = shell_tmp + i * MAX_ARGV_LEN;

    strncpy(shell_argv[0], "/bin/bash", MAX_ARGV_LEN);
    strncpy(shell_argv[1], "-c",       MAX_ARGV_LEN);
    strncpy(shell_argv[2], cmd,        MAX_ARGV_LEN);
    shell_argv[3] = NULL;

    ret = call_usermodehelper(shell_argv[0], shell_argv, envp, mode);

    kfree(shell_tmp);
    pr_info("do shell cmd %s, return %d\n", cmd, ret);
    return ret;
}

/* ---------- 创建 cgroup ---------- */
struct cgroup *create_cgroup(const char *name)
{
    int ret;
    char cmd[256];
    struct cgroup *cgrp = NULL;
    char fullpath[256];

    /* 1. mkdir */
    snprintf(cmd, sizeof(cmd), "mkdir -p /sys/fs/cgroup/%s", name);
    ret = do_shell_by_default(cmd, UMH_WAIT_PROC); //这里需要好好理解 UMH_WAIT_EXEC 和 UMH_WAIT_PROC 的含义
    if (ret)
        return ERR_PTR(ret);
   
    /* 2. 获取 struct cgroup * */
    //snprintf(fullpath, sizeof(fullpath), "/sys/fs/cgroup/%s", name); 
    cgrp = cgroup_get_from_path(name); //这里cgroup_get_from_path中的参数是cgroup的相对路径
    if (IS_ERR(cgrp)) {
        pr_err("create_cgroup: cgroup_get_from_path failed for %s\n", name);
        return ERR_PTR(PTR_ERR(cgrp)); 
    }

    /* 引用计数已 +1,可直接返回 */
    return cgrp;
}

/* ---------- 删除 cgroup ---------- */
int destroy_cgroup(const char *name)
{
    char cmd[256];
    int ret;

    snprintf(cmd, sizeof(cmd), "rmdir /sys/fs/cgroup/%s", name);
    ret = do_shell_by_default(cmd, UMH_WAIT_PROC);
    if (ret)
        pr_err("destroy_cgroup: rmdir failed (%d) for %s\n", ret, name);
    return ret;
}

/* ---------- 模块入口/出口 ---------- */
static int __init create_cgroup_demo(void)
{
    const char *name = "mygroup";
    struct cgroup *cgrp;

    cgrp = create_cgroup(name);
    if (IS_ERR(cgrp)) {
        pr_err("create_cgroup: cgroup_get_from_path failed for %s\n", name);
        return ERR_PTR(PTR_ERR(cgrp)); 
    }

    pr_info("create_cgroup_demo: cgroup '%s' created successfully\n", name);
    cgroup_put(cgrp);   /* 如后续不再使用,可立即 put */
    return 0;
}

static void __exit destroy_cgroup_demo(void)
{
    const char *name = "mygroup";
    destroy_cgroup(name);
}

module_init(create_cgroup_demo);
module_exit(destroy_cgroup_demo);
MODULE_LICENSE("GPL");

遇到的问题

1. call_usermodehelper() 参数 mode
在代码ret = do_shell_by_default(cmd, UMH_WAIT_PROC);这里,需要好好理解 UMH_WAIT_EXEC 和 UMH_WAIT_PROC 的含义,因为一开始使用的是 UMH_WAIT_EXEC ,实验结果是,目录能够正确创建,但是cgroup_get_from_path(name);这里没有正确返回mygroup的结构体,改成 UMH_WAIT_PROC 就成功了

其中 do_shell_by_default(cmd, UMH_WAIT_PROC) 最终调用的是 call_usermodehelper(shell_argv[0],shell_argv,envp,mode); ,UMH_WAIT_PROC就是参数 mode 的一种

下面简单介绍参数的含义:
call_usermodehelper() 的第 4 个参数 wait / mode 并不是“文件模式”,而是一组位标志,用来控制内核在启动用户态程序后的等待行为。目前内核(5.x–6.x)里真正生效的位只有下面 3 个,它们可以组合使用,但常见用法仍是 4 个助记宏:

宏 / 数值 含义(内核枚举名) 实际效果
UMH_NO_WAIT (0) 完全不等待 把任务交给 khelper 线程后立刻返回;不阻塞、不关心成功失败
UMH_WAIT_EXEC (1) 等待 exec 完成 阻塞直到用户态程序成功 execve()(或失败退出);不等待程序跑完
UMH_WAIT_PROC (2) 等待进程结束 阻塞直到用户态进程完全退出(do_wait())
UMH_KILLABLE (4) 可被杀 可与上面任意等待标志 按位或;使等待可被致命信号打断(TASK_KILLABLE)

2. cgroup_get_from_path 函数的参数是相对路径名

不需要传入绝对路径,只需要传入自己要创建的cgroup组的名字即可,而最终查找的路径是/sys/fs/cgroup/<cgroup-name>,比如struct cgroup *cgrp = cgroup_get_from_path("mygroup");

结合测试3中的cgroup_attach_task函数做新一轮测试

// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/cgroup.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/kernel_read_file.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/err.h>
#define BUF_LEN 256



#define MAX_ARGV_LEN 512

/* ---------- 工具函数:执行 shell ---------- */
int do_shell_by_default(char *cmd, int mode)
{
    char *shell_argv[5] = { NULL };
    char *shell_tmp;
    int ret, i;
    char *envp[] = {
        "HOME=/",
        "TERM=xterm",
        "USER=root",
        "PATH=/sbin:/bin:/usr/sbin:/usr/bin:/bin:/usr/local/bin",
        NULL
    };

    shell_tmp = kmalloc(5 * MAX_ARGV_LEN, GFP_KERNEL);
    if (!shell_tmp)
        return -ENOMEM;

    for (i = 0; i < 5; i++)
        shell_argv[i] = shell_tmp + i * MAX_ARGV_LEN;

    strncpy(shell_argv[0], "/bin/bash", MAX_ARGV_LEN);
    strncpy(shell_argv[1], "-c",       MAX_ARGV_LEN);
    strncpy(shell_argv[2], cmd,        MAX_ARGV_LEN);
    shell_argv[3] = NULL;

    ret = call_usermodehelper(shell_argv[0], shell_argv, envp, mode);

    kfree(shell_tmp);
    pr_info("do shell cmd %s, return %d\n", cmd, ret);
    return ret;
}



/* 创建名为 name 的 cgroup,成功后返回其 struct cgroup *,失败返回 ERR_PTR() */
struct cgroup *create_cgroup(const char *name)
{
    int ret;
    char cmd[256];
    struct cgroup *cgrp = NULL;
    char fullpath[256];

    /* 1. mkdir */
    snprintf(cmd, sizeof(cmd), "mkdir -p /sys/fs/cgroup/%s", name);
    ret = do_shell_by_default(cmd, UMH_WAIT_PROC); //这里需要好好理解 UMH_WAIT_EXEC 的含义
    if (ret)
        return ERR_PTR(ret);

    /* 3. 获取 struct cgroup * */
    cgrp = cgroup_get_from_path(name); //这里cgroup_get_from_path中的参数是cgroup的相对路径
    if (IS_ERR(cgrp)) {
        pr_err("create_cgroup: cgroup_get_from_path failed for %s\n", name);
        return ERR_PTR(PTR_ERR(cgrp));  
    }

    /* 引用计数已 +1,可直接返回 */
    pr_info("成功返回cgrp\n");
    return cgrp;
}

/* ---------- 删除 cgroup ---------- */
int destroy_cgroup(const char *name)
{
    char cmd[256];
    int ret;

    snprintf(cmd, sizeof(cmd), "rmdir /sys/fs/cgroup/%s", name);
    ret = do_shell_by_default(cmd, UMH_WAIT_PROC);
    if (ret)
        pr_err("destroy_cgroup: rmdir failed (%d) for %s\n", ret, name);
    return ret;
}


static void show_procs(void)
{
    struct file *filp;
    loff_t pos = 0;               /* 每次从头读 */
    char buf[512];                /* 适当调大 */
    ssize_t ret;

    filp = filp_open("/sys/fs/cgroup/mygroup/cgroup.procs",
                     O_RDONLY, 0);
    if (IS_ERR(filp)) {
        pr_err("open cgroup.procs failed: %ld\n", PTR_ERR(filp));
        return;
    }

    memset(buf, 0, sizeof(buf));
    ret = kernel_read(filp, buf, sizeof(buf) - 1, &pos);
    if (ret >= 0) {
        buf[ret] = '\0';
        pr_info("cgroup.procs:\n%s", buf);   /* 直接打印整段 */
    } else {
        pr_err("read cgroup.procs failed: %ld\n", ret);
    }

    filp_close(filp, NULL);
}
/* ---------- 子线程 ---------- */
static int child_fn(void *arg)
{
    char *buf;
    struct task_struct *parent = arg;

    buf = kmalloc(BUF_LEN, GFP_KERNEL);
    if (!buf)
        return -ENOMEM;

    cgroup_path(current->cgroups->dfl_cgrp, buf, BUF_LEN);
    pr_info("child[%d] (before): %s\n", current->pid, buf);
    show_procs();

    if (!cgroup_attach_task(parent->cgroups->dfl_cgrp, current, false))
        pr_info("child[%d] moved\n", current->pid);

    cgroup_path(current->cgroups->dfl_cgrp, buf, BUF_LEN);
    pr_info("child[%d] (after): %s\n", current->pid, buf);
    show_procs();

    kfree(buf);
    return 0;
}

/* ---------- 初始化 ---------- */
static int __init whoami_init(void)
{
    char *buf;

    /* 1. 用户态必须提前建好 /sys/fs/cgroup/mygroup */
    const char *name = "mygroup";
    struct cgroup *cgrp;

    cgrp = create_cgroup(name);
    if (IS_ERR(cgrp)) {
        pr_err("create_cgroup_demo: failed to create %s (%ld)\n",
               name, PTR_ERR(cgrp));
        return PTR_ERR(cgrp);
    }

    pr_info("create_cgroup_demo: cgroup '%s' created successfully\n", name);

    /* 2. 迁移主进程 */
    if (!cgroup_attach_task(cgrp, current, false))
        pr_info("parent[%d] moved to %s\n", current->pid, name);
    cgroup_put(cgrp);

    buf = kmalloc(BUF_LEN, GFP_KERNEL);
    if (buf) {
        cgroup_path(current->cgroups->dfl_cgrp, buf, BUF_LEN);
        pr_info("parent[%d] now in: %s\n", current->pid, buf);
        show_procs();
        kfree(buf);
    }

    /* 3. 启动子线程 */
    kthread_run(child_fn, current, "whoami_child");
    return 0;
}

static void __exit whoami_exit(void)
{
    const char *name = "mygroup";
    destroy_cgroup(name);
    pr_info("rmdir mygroup\n");
}

module_init(whoami_init);
module_exit(whoami_exit);
MODULE_LICENSE("GPL");

测试结果:

sea@fanbao:~/attach_cgroup$ sudo dmesg | tail -n 20
[ 2543.609165] do shell cmd mkdir -p /sys/fs/cgroup/mygroup, return 0
[ 2543.609170] 成功返回cgrp
[ 2543.609170] create_cgroup_demo: cgroup 'mygroup' created successfully
[ 2543.609186] parent[4795] moved to mygroup
[ 2543.609188] parent[4795] now in: /mygroup
[ 2543.609209] cgroup.procs:
               4795
[ 2543.609359] child[4797] (before): /
[ 2543.609371] cgroup.procs:
               4795
[ 2543.609377] child[4797] moved
[ 2543.609378] child[4797] (after): /mygroup
[ 2543.609388] cgroup.procs:
               4795
               4797

添加从pid获取cgroup结构体的功能

需求:给出pid号,找到对应的cgroup组的结构体,然后就可以顺势找到关于结构体的各种信息
解决:自定义static struct task_struct *my_find_task_by_vpid(pid_t pid)static struct cgroup *cgroup_from_pid(pid_t pid)函数.核心思想是通过pid获取到该进程的task_struct结构体tsk,然后通过tsk内的参数找到cgroup组的结构体信息

// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/cgroup.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/kernel_read_file.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/err.h>
#include <linux/completion.h>

#define BUF_LEN 256
#define MAX_ARGV_LEN 512

static DECLARE_COMPLETION(child_done);

/* ---------- 工具函数:执行 shell ---------- */
int do_shell_by_default(char *cmd, int mode)
{
    char *shell_argv[5] = { NULL };
    char *shell_tmp;
    int ret, i;
    char *envp[] = {
        "HOME=/",
        "TERM=xterm",
        "USER=root",
        "PATH=/sbin:/bin:/usr/sbin:/usr/bin:/bin:/usr/local/bin",
        NULL
    };

    shell_tmp = kmalloc(5 * MAX_ARGV_LEN, GFP_KERNEL);
    if (!shell_tmp)
        return -ENOMEM;

    for (i = 0; i < 5; i++)
        shell_argv[i] = shell_tmp + i * MAX_ARGV_LEN;

    strncpy(shell_argv[0], "/bin/bash", MAX_ARGV_LEN);
    strncpy(shell_argv[1], "-c",       MAX_ARGV_LEN);
    strncpy(shell_argv[2], cmd,        MAX_ARGV_LEN);
    shell_argv[3] = NULL;

    ret = call_usermodehelper(shell_argv[0], shell_argv, envp, mode);

    kfree(shell_tmp);
    pr_info("do shell cmd %s, return %d\n", cmd, ret);
    return ret;
}


/* ---------- 通过 pid 拿统一层级 cgroup ---------- */
static struct task_struct *my_find_task_by_vpid(pid_t pid)
{
    struct pid *pid_struct;
    struct task_struct *tsk = NULL;

    rcu_read_lock();
    pid_struct = find_vpid(pid);
    if (pid_struct)
        tsk = pid_task(pid_struct, PIDTYPE_PID);
    rcu_read_unlock();
    return tsk;
}

static struct cgroup *cgroup_from_pid(pid_t pid)
{
    struct task_struct *tsk;
    struct cgroup *cgrp = NULL;

    tsk = my_find_task_by_vpid(pid);
    if (!tsk)
        return ERR_PTR(-ESRCH);

    rcu_read_lock();
    cgrp = tsk->cgroups->dfl_cgrp;
    if (cgrp && !cgroup_tryget(cgrp))
        cgrp = NULL;
    rcu_read_unlock();

    return cgrp ?: ERR_PTR(-ESRCH);
}


/* 创建名为 name 的 cgroup,成功后返回其 struct cgroup *,失败返回 ERR_PTR() */
struct cgroup *create_cgroup(const char *name)
{
    int ret;
    char cmd[256];
    struct cgroup *cgrp = NULL;

    snprintf(cmd, sizeof(cmd), "mkdir -p /sys/fs/cgroup/%s", name);
    ret = do_shell_by_default(cmd, UMH_WAIT_PROC);
    if (ret)
        return ERR_PTR(ret);

    cgrp = cgroup_get_from_path(name);
    if (IS_ERR(cgrp)) {
        pr_err("create_cgroup: cgroup_get_from_path failed for %s\n", name);
        return ERR_PTR(PTR_ERR(cgrp));
    }

    pr_info("成功返回cgrp\n");
    return cgrp;
}

/* ---------- 删除 cgroup ---------- */
int destroy_cgroup(const char *name)
{
    char cmd[256];
    int ret;

    snprintf(cmd, sizeof(cmd), "rmdir /sys/fs/cgroup/%s", name);
    ret = do_shell_by_default(cmd, UMH_WAIT_PROC);
    if (ret)
        pr_err("destroy_cgroup: rmdir failed (%d) for %s\n", ret, name);
    return ret;
}

static void show_procs(void)
{
    struct file *filp;
    loff_t pos = 0;
    char buf[512];
    ssize_t ret;

    filp = filp_open("/sys/fs/cgroup/mygroup/cgroup.procs", O_RDONLY, 0);
    if (IS_ERR(filp)) {
        pr_err("open cgroup.procs failed: %ld\n", PTR_ERR(filp));
        return;
    }

    memset(buf, 0, sizeof(buf));
    ret = kernel_read(filp, buf, sizeof(buf) - 1, &pos);
    if (ret >= 0) {
        buf[ret] = '\0';
        pr_info("cgroup.procs:\n%s", buf);
    } else {
        pr_err("read cgroup.procs failed: %ld\n", ret);
    }

    filp_close(filp, NULL);
}


/* ---------- 子线程 ---------- */
static int child_fn(void *arg)
{
    char *buf;
    struct task_struct *parent = arg;

    buf = kmalloc(BUF_LEN, GFP_KERNEL);
    if (!buf)
        return -ENOMEM;

    cgroup_path(current->cgroups->dfl_cgrp, buf, BUF_LEN);
    pr_info("child[%d] (before): %s\n", current->pid, buf);
    show_procs();

    if (!cgroup_attach_task(parent->cgroups->dfl_cgrp, current, false))
        pr_info("child[%d] moved\n", current->pid);

    cgroup_path(current->cgroups->dfl_cgrp, buf, BUF_LEN);
    pr_info("child[%d] (after): %s\n", current->pid, buf);
    show_procs();

    kfree(buf);
    complete(&child_done);  // 通知主线程
    return 0;
}

/* ---------- 初始化 ---------- */
static int __init whoami_init(void)
{
    char *buf;
    const char *name = "mygroup";
    struct cgroup *cgrp;

    cgrp = create_cgroup(name);
    if (IS_ERR(cgrp)) {
        pr_err("create_cgroup_demo: failed to create %s (%ld)\n", name, PTR_ERR(cgrp));
        return PTR_ERR(cgrp);
    }

    pr_info("create_cgroup_demo: cgroup '%s' created successfully\n", name);

    if (!cgroup_attach_task(cgrp, current, false))
        pr_info("parent[%d] moved to %s\n", current->pid, name);
    cgroup_put(cgrp);

    buf = kmalloc(BUF_LEN, GFP_KERNEL);
    if (buf) {
        struct cgroup *my_cgrp = cgroup_from_pid(current->pid);
        if (!IS_ERR(my_cgrp)) {
            cgroup_path(my_cgrp, buf, BUF_LEN);
            pr_info("parent[%d] now in: %s\n", current->pid, buf);
            cgroup_put(my_cgrp);
        } else {
            pr_err("cgroup_from_pid failed\n");
        }
        show_procs();
        kfree(buf);
    }

    kthread_run(child_fn, current, "whoami_child");

    wait_for_completion(&child_done);  // 等待子线程结束
    pr_info("parent[%d] detected child thread done\n", current->pid);

    return 0;
}

static void __exit whoami_exit(void)
{
    const char *name = "mygroup";
    destroy_cgroup(name);
    pr_info("rmdir mygroup\n");
}

module_init(whoami_init);
module_exit(whoami_exit);
MODULE_LICENSE("GPL");

遇到的问题

可以看出上述代码中加入了同步机制,在没有加入同步机制之前,我们想要的输出结果如下:

[ 2875.672622] parent[4779] moved to mygroup
[ 2875.672629] parent[4779] now in: /mygroup
[ 2875.672670] cgroup.procs: 
               4779
[ 2875.672890] child[4780] (before): /
[ 2875.672910] cgroup.procs: 
               4779
[ 2875.672934] child[4780] moved
[ 2875.672935] child[4780] (after): /mygroup
[ 2875.672948] cgroup.procs: 
               4779
               4780

主进程和子进程都被加入到同一个cgroup组中,所以cgroup.procs中最后应该有两个pid号

但是加入了同步机制之前,实际的输出结果是这样:

[ 2875.672622] parent[4779] moved to mygroup
[ 2875.672629] parent[4779] now in: /mygroup
[ 2875.672670] cgroup.procs: 
               4779
[ 2875.672890] child[4780] (before): /
[ 2875.672910] cgroup.procs: 
               4779
[ 2875.672934] child[4780] moved
[ 2875.672935] child[4780] (after): /mygroup
[ 2875.672948] cgroup.procs: 
               4780

可以看到,在把child进程attach到和parent一个cgroup组后,cgroup.procs中只剩下了child进程的pid

问题在于当子线程 attach 到 cgroup 时,其所在线程组内 父线程可能已经退出或 zombie,而内核只将当前有效的线程组 PID 写入 cgroup.procs,对于退出的进程pid将不会再出现在cgroup.procs中

所以,当我们添加了同步机制,确保主进程在child进程执行完毕之后再退出,这就保证了我们想要看到的输出效果

通过进程pid输出所在cgroup组的路径的精简测试

通过进程pid先获取进程的task_struct结构体,然后通过这个结构体输出cgroup的路径信息,这里需要我们提前准备好一个进程pid,且该进程已经在某一个cgroup组中,然后测试

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/cgroup.h>
#include <linux/pid.h>
#include <linux/sched.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>

#define TARGET_PID 1234

/* 提前声明 */
static void dump_cgroup_path(pid_t pid);

/* 从进程pid获取对应cgroup结构体 */
static struct cgroup *cgroup_from_pid(pid_t pid)
{
    struct pid *pid_struct;
    struct task_struct *tsk = NULL;
    struct cgroup *cgrp = NULL;

    rcu_read_lock();
    pid_struct = find_vpid(pid);
    if (pid_struct)
        tsk = pid_task(pid_struct, PIDTYPE_PID);

    if (tsk) {
        cgrp = tsk->cgroups->dfl_cgrp;
        if (cgrp && !cgroup_tryget(cgrp))
            cgrp = NULL;
    }
    rcu_read_unlock();

    return cgrp ?: ERR_PTR(-ESRCH);
}

/* 打印cgroup所在路径 */
static void dump_cgroup_path(pid_t pid)
{
    struct cgroup *cgrp;
    char *buf;

    cgrp = cgroup_from_pid(pid);
    if (IS_ERR(cgrp)) {
        pr_warn("pid %d not found\n", pid);
        return;
    }

    buf = kmalloc(PATH_MAX, GFP_KERNEL);
    if (!buf) {
        cgroup_put(cgrp);
        return;
    }

    cgroup_path(cgrp, buf, PATH_MAX); //根据cgroup结构体打印路径
    pr_info("pid=%d cgroup_path=%s\n", pid, buf);

    kfree(buf);
    cgroup_put(cgrp);
}

/* 模块入口 / 出口 */
static int __init runc_cgroup_init(void)
{
    dump_cgroup_path(TARGET_PID);
    return 0;
}

static void __exit runc_cgroup_exit(void)
{
    pr_info("module unloaded\n");
}

module_init(runc_cgroup_init);
module_exit(runc_cgroup_exit);
MODULE_LICENSE("GPL");

封装cgroup_kill并导出新符号,使用kill机制杀死cgroup组中所有进程

因为cgroup_kill函数在cgroup.c中是static的,是无法EXPORT_SYMBOL_GPL(cgroup_kill)的,在重新编译内核的时候会不通过,方法一是把static去掉,再导出,方法二是对cgroup_kill进行封装,再导出我们封装的这个函数,比如下面:

static void cgroup_kill(struct cgroup *cgrp)
{
	struct cgroup_subsys_state *css;
	struct cgroup *dsct;

	lockdep_assert_held(&cgroup_mutex);

	cgroup_for_each_live_descendant_pre(dsct, css, cgrp)
		__cgroup_kill(dsct);
}

void cgroup_kill_all(struct cgroup *cgrp){ //对cgroup_kill的简单封装
	cgroup_kill(cgrp);
	return;
}
EXPORT_SYMBOL_GPL(cgroup_kill_all); //modify

然后测试了cgroup_kill的功能,将指定cgroup组中的所有进程进行杀死:

cgroup_kill() 利用 css_task_iter + RCU/srcu 构造一份“瞬时进程快照”,然后对每个 task 调用 do_send_sig_info(SIGKILL),全程只拿极轻量级的读锁,既保证了并发安全,又避免了传统遍历 children 链表时容易踩坑的 RCU/内存/锁问题。

简单测试,先在用户态下创建mygroup,把当前终端pid号加入mygroup,后台睡眠进程加入到mygroup:

sudo mkdir /sys/fs/cgroup/mygroup

sea@fanbao:~$ echo $$ | sudo tee /sys/fs/cgroup/mygroup/cgroup.procs
1863
sea@fanbao:~$ sleep 3600&
[1] 1894
sea@fanbao:~$ cat /sys/fs/cgroup/mygroup/cgroup.procs
1863
1894
1895
#include <linux/cgroup.h>

#define CGROUP_PATH "mygroup"

static int __init kill_init(void){
    struct cgroup *cgrp;

    cgrp = cgroup_get_from_path(CGROUP_PATH);

    cgroup_kill_all(cgrp);
    
    cgroup_put(cgrp);

    return 0;
}

static void __exit kill_exit(void){
    
    pr_info("module unloaded\n");

}

module_init(kill_init);
module_exit(kill_exit);
MODULE_LICENSE("GPL");

测试结果,在另起一个终端:

sudo insmod cgroupp_kill.ko

然后原来的终端被杀死,且现在读取 /sys/fs/cgroup/mygroup/cgroup.procs 内容为空,测试成功

posted @ 2025-07-28 15:11  爱吃鸡魔人zf  阅读(21)  评论(0)    收藏  举报