cgroup机制
主要机制
测试功能
测试1
用户态测试
- task1是主进程,先手动创建mygroup组,
sudo mkdir /sys/fs/cgroup/mygroup - 然后task1通过函数将自己的pid号加入到 mygroup/cgroup.procs 中,task1调用system启动子进程task2
- task2再调用system启动它的子进程task3
- 最后观察 mygroup/cgroup.procs ,可以发现task2和task3的pid被自动加入其中,证明了在用户态cgroup机制的可行性
/*---------------------------task1.c------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <errno.h>
#define CGROUP_PATH "/sys/fs/cgroup/mygroup"
int add_pid_to_cgroup(pid_t pid) {
char path[256];
snprintf(path, sizeof(path), "%s/cgroup.procs", CGROUP_PATH);
FILE *fp = fopen(path, "w");
if (!fp) {
perror("Failed to open cgroup.procs");
return -1;
}
fprintf(fp, "%d", pid);
fclose(fp);
return 0;
}
int main(){
pid_t pid = getpid();
printf("task1 PID: %d\n", pid);
// 把主PID加入cgroup
if (add_pid_to_cgroup(pid) != 0) {
fprintf(stderr, "Failed to add main pid to cgroup\n");
return -1;
}
sleep(5);
printf("执行task2\n");
system("./task2"); //system()自带同步阻塞机制,不用担心task2和task1进程会争抢cpu
return 0;
}
/*------------------------------------task2.c---------------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <errno.h>
int main(){
pid_t pid = getpid();
printf("task2 PID: %d\n", pid);
sleep(5);
printf("执行task3\n");
system("./task3");
//system(task2);
}
/*-----------------------------------------------task3.c-------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <errno.h>
int main(){
pid_t pid = getpid();
printf("task3 PID: %d\n", pid);
sleep(5);
printf("结束\n");
//system(task2);
}
/*----------------------------------------monitor.c----------------------------------*/
//用于监控/sys/fs/cgroup/mygroup/cgroup.procs
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#define CGROUP_PROCS_PATH "/sys/fs/cgroup/mygroup/cgroup.procs"
void print_cgroup_procs() {
FILE *fp = fopen(CGROUP_PROCS_PATH, "r");
if (!fp) {
perror("Failed to open cgroup.procs");
return;
}
printf("=== cgroup.procs snapshot ===\n");
int pid;
while (fscanf(fp, "%d", &pid) == 1) {
printf("PID in cgroup: %d\n", pid);
}
fclose(fp);
printf("=============================\n\n");
}
int main() {
printf("Monitoring %s\n", CGROUP_PROCS_PATH);
while (1) {
print_cgroup_procs();
sleep(1); // 每秒打印一次
}
return 0;
}
测试结果,taks2和task3自动继承到mygroup中,pid自动写入了文件/sys/fs/cgroup/mygroup/cgroup.procs中
测试2
用户态测试
- task1是主进程,先手动创建mygroup组,
sudo mkdir /sys/fs/cgroup/mygroup - 然后task1通过函数将自己的pid号加入到 mygroup/cgroup.procs 中,然后在线程中启动task2
- 观察mygroup/cgroup.procs,其中并没有task2的pid号,因为线程启动的进程pid和之前task1的是同一个,如果观察mygroup/cgroup.threads是有task2的启动记录的
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <pthread.h>
#define CGROUP_PATH "/sys/fs/cgroup/mygroup"
/* ---------- 把 task2 的代码改造成线程函数 ---------- */
void* task2_thread(void* arg)
{
pid_t pid = getpid(); // 注意:线程里 PID 与主线程相同
printf("task2 thread running, PID: %d TID: %ld\n",
pid, (long)pthread_self());
sleep(20);
return NULL;
}
/* -------------------------------------------------- */
int add_pid_to_cgroup(pid_t pid)
{
char path[256];
snprintf(path, sizeof(path), "%s/cgroup.procs", CGROUP_PATH);
FILE *fp = fopen(path, "w");
if (!fp) {
perror("Failed to open cgroup.procs");
return -1;
}
fprintf(fp, "%d", pid);
fclose(fp);
return 0;
}
int main(void)
{
pid_t pid = getpid();
printf("task1 PID: %d\n", pid);
if (add_pid_to_cgroup(pid) != 0) {
fprintf(stderr, "Failed to add main pid to cgroup\n");
return EXIT_FAILURE;
}
sleep(5);
printf("创建线程执行 task2 逻辑...\n");
pthread_t tid;
if (pthread_create(&tid, NULL, task2_thread, NULL) != 0) {
perror("pthread_create");
return EXIT_FAILURE;
}
pthread_join(tid, NULL); // 主线程等待 task2 线程结束
return 0;
}
测试结果:如果是主进程用线程启动一个任务,这个新任务的pid和主进程的pid是一样的,所以procs文件中没有新的pid号,但是threads文件中会有新任务的tid号
测试3
测试步骤:
- 1.用户态下,手动创建目录/sys/fs/cgroup/mygroup/ 因为尝试了程序里面调用api创建mygroup,失败了,好多api是v1层级下使用的,也有好多函数没有导出符号给我们使用
- 2.用cgroup_attach_task将主进程加入到mygroup,并且打印自身pid以及找出自身所在的cgroup的相对路径
- 3.然后打印procs里面的内容
- 4.kthread打印自身所在cgroup的相对路径
- 5.kthread调用
cgroup_attach_task将自己加入主进程组中,再次打印相对路径 - 6.再次打印procs里面的内容
// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/cgroup.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/kernel_read_file.h>
#define BUF_LEN 256
#define NEW_CGROUP_PATH "mygroup" /* 相对于根,不含前导 / */
/* ---------- 回显 cgroup.procs ---------- */
static void show_procs(void)
{
struct file *filp;
loff_t pos = 0;
char buf[256];
ssize_t ret;
filp = filp_open("/sys/fs/cgroup/mygroup/cgroup.procs",
O_RDONLY, 0);
if (IS_ERR(filp)) {
pr_err("open cgroup.procs failed: %ld\n", PTR_ERR(filp));
return;
}
ret = kernel_read(filp, buf, sizeof(buf) - 1, &pos);
if (ret >= 0) {
buf[ret] = '\0';
pr_info("cgroup.procs: %s", buf);
}
filp_close(filp, NULL);
}
/* ---------- 子线程 ---------- */
static int child_fn(void *arg)
{
char *buf;
struct task_struct *parent = arg;
buf = kmalloc(BUF_LEN, GFP_KERNEL);
if (!buf)
return -ENOMEM;
cgroup_path(current->cgroups->dfl_cgrp, buf, BUF_LEN);
pr_info("child[%d] (before): %s\n", current->pid, buf);
show_procs();
if (!cgroup_attach_task(parent->cgroups->dfl_cgrp, current, false))
pr_info("child[%d] moved\n", current->pid);
cgroup_path(current->cgroups->dfl_cgrp, buf, BUF_LEN);
pr_info("child[%d] (after): %s\n", current->pid, buf);
show_procs();
kfree(buf);
return 0;
}
/* ---------- 初始化 ---------- */
static int __init whoami_init(void)
{
struct cgroup *cgrp;
char *buf;
/* 1. 用户态必须提前建好 /sys/fs/cgroup/mygroup */
cgrp = cgroup_get_from_path(NEW_CGROUP_PATH);
if (IS_ERR(cgrp)) {
pr_err("cannot find %s (%ld)\n", NEW_CGROUP_PATH, PTR_ERR(cgrp));
return PTR_ERR(cgrp);
}
/* 2. 迁移主进程 */
if (!cgroup_attach_task(cgrp, current, false))
pr_info("parent[%d] moved to %s\n", current->pid, NEW_CGROUP_PATH);
cgroup_put(cgrp);
buf = kmalloc(BUF_LEN, GFP_KERNEL);
if (buf) {
cgroup_path(current->cgroups->dfl_cgrp, buf, BUF_LEN);
pr_info("parent[%d] now in: %s\n", current->pid, buf);
show_procs();
kfree(buf);
}
/* 3. 启动子线程 */
kthread_run(child_fn, current, "whoami_child");
return 0;
}
static void __exit whoami_exit(void)
{
pr_info("module unloaded\n");
}
module_init(whoami_init);
module_exit(whoami_exit);
MODULE_LICENSE("GPL");
将编译好的模块加入到内核中,测试结果如下:
sudo dmesg | tail
[ 2875.672622] parent[4779] moved to mygroup
[ 2875.672629] parent[4779] now in: /mygroup
[ 2875.672670] cgroup.procs: 4779
[ 2875.672890] child[4780] (before): /
[ 2875.672910] cgroup.procs: 4779
[ 2875.672934] child[4780] moved
[ 2875.672935] child[4780] (after): /mygroup
[ 2875.672948] cgroup.procs: 4779
4780
可以看到,一开始procs里面只有parent进程的pid,后面在把child拉进mygroup之前,child默认加入的cgroup组是根目录"/sys/fs/cgroup/",在使用cgroup_attach_task函数将child拉进mygroup之后,procs里面变成了parent和child的两个进程pid
遇到的问题
其中cgroup_attach_task函数存在于 cgroup.c 中,但是这个函数并没有导出符号,因此想要使用的话,需要修改内核源码,将这个函数符号导出,并且重新编译内核和安装,在新的内核下才可以使用该函数
创建cgroup组只能在用户态下创建
之前试过各种内核环境下创建 cgroup 组,利用内核API在 /sys/fs/cgroup/下创建cgroup目录,其中还导出了cgroup_mkdir()的符号,重新编译内核,使其能够被调用,但是cgroup_mkdir()需要以 当前 task 的上下文作为 owner task,cgroup_mkdir() 最终会检查权限和上下文,如果你是通过内核模块直接调用这个函数,它默认使用 current task 作为“操作者”,但此时 current 是内核线程,权限不足或上下文不对。
所以创建cgroup组并不是简单的在根路径(/sys/fs/cgroup/)下创建目录,在用户态下的sudo mkdir mygroup其中包含了很多内核操作,仅仅用cgroup_mkdir()函数解决不了问题
最终方案:我们必须通过用户空间调用 mkdir() 来创建 cgroup 目录 (至少目前的研究现况来说是的)
下面是通过调用call_usermodehelper,传入相关参数,在内核态下调用用户态来执行mkdir的操作
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/delay.h>
#define MAX_ARGV_LEN 512
int do_shell_by_default(char * cmd, int mode)
{
char *shell_argv[5] = {NULL};
char *shell_tmp =NULL;
int ret = 0;
int i = 0;
char *envp[] = {
"HOME=/",
"TERM=xterm",
"USER=root",
"PATH=/sbin:/bin:/usr/sbin:/usr/bin:/bin/:/usr/local/bin",
NULL
};
//char argv[5][MAX_ARGV_LEN];
shell_tmp = kmalloc(5*sizeof(char)*MAX_ARGV_LEN, GFP_KERNEL); //
if(shell_tmp == NULL){
ret = -1;
return -1;
}
for(i = 0; i<5; i++)
{
shell_argv[i] = shell_tmp+(i*MAX_ARGV_LEN); //
}
strncpy(shell_argv[0],"/bin/bash",MAX_ARGV_LEN);
strncpy(shell_argv[1],"-c",MAX_ARGV_LEN);
strncpy(shell_argv[2],cmd,MAX_ARGV_LEN);
shell_argv[3] = NULL;
ret = call_usermodehelper(shell_argv[0],shell_argv,envp,mode); //默认等待执行完毕
for(i = 0; i<5; i++)
{
shell_argv[i] = NULL; //恢复指针
}
kfree(shell_tmp); //释放指针
pr_info("do shell cmd %s,return %d\n", cmd, ret);
return ret;
}
static int __init create_cgroup_demo(void)
{
char cmd[128];
const char *name = "mygroup"; /* 想创建的 cgroup 名 */
int ret;
/* 构造命令:mkdir -p /sys/fs/cgroup/<name> */
snprintf(cmd, sizeof(cmd), "mkdir -p /sys/fs/cgroup/%s", name);
/* 同步执行(等待返回) */
ret = do_shell_by_default(cmd, UMH_WAIT_EXEC);
if (ret) {
pr_err("create_cgroup: mkdir failed, ret=%d\n", ret);
return ret;
}
pr_info("create_cgroup: /sys/fs/cgroup/%s created\n", name);
return 0;
}
static void __exit destroy_cgroup_demo(void)
{
/* 可选:模块卸载时删掉目录 */
char cmd[128];
const char *name = "mygroup";
snprintf(cmd, sizeof(cmd), "rmdir /sys/fs/cgroup/%s", name);
do_shell_by_default(cmd, UMH_WAIT_EXEC);
}
module_init(create_cgroup_demo);
module_exit(destroy_cgroup_demo);
MODULE_LICENSE("GPL");
最终能够成功在内核环境下创建目录
内核使用call_userhelper启动用户态创建cgroup组目录
下面代码能够实现在内核环境下用call_userhelper启动用户态创建cgroup组目录,然后返回这个组的cgrp结构体,用于guard-device项目中
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/cgroup.h>
#include <linux/err.h>
#define MAX_ARGV_LEN 512
/* ---------- 工具函数:执行 shell ---------- */
int do_shell_by_default(char *cmd, int mode)
{
char *shell_argv[5] = { NULL };
char *shell_tmp;
int ret, i;
char *envp[] = {
"HOME=/",
"TERM=xterm",
"USER=root",
"PATH=/sbin:/bin:/usr/sbin:/usr/bin:/bin:/usr/local/bin",
NULL
};
shell_tmp = kmalloc(5 * MAX_ARGV_LEN, GFP_KERNEL);
if (!shell_tmp)
return -ENOMEM;
for (i = 0; i < 5; i++)
shell_argv[i] = shell_tmp + i * MAX_ARGV_LEN;
strncpy(shell_argv[0], "/bin/bash", MAX_ARGV_LEN);
strncpy(shell_argv[1], "-c", MAX_ARGV_LEN);
strncpy(shell_argv[2], cmd, MAX_ARGV_LEN);
shell_argv[3] = NULL;
ret = call_usermodehelper(shell_argv[0], shell_argv, envp, mode);
kfree(shell_tmp);
pr_info("do shell cmd %s, return %d\n", cmd, ret);
return ret;
}
/* ---------- 创建 cgroup ---------- */
struct cgroup *create_cgroup(const char *name)
{
int ret;
char cmd[256];
struct cgroup *cgrp = NULL;
char fullpath[256];
/* 1. mkdir */
snprintf(cmd, sizeof(cmd), "mkdir -p /sys/fs/cgroup/%s", name);
ret = do_shell_by_default(cmd, UMH_WAIT_PROC); //这里需要好好理解 UMH_WAIT_EXEC 和 UMH_WAIT_PROC 的含义
if (ret)
return ERR_PTR(ret);
/* 2. 获取 struct cgroup * */
//snprintf(fullpath, sizeof(fullpath), "/sys/fs/cgroup/%s", name);
cgrp = cgroup_get_from_path(name); //这里cgroup_get_from_path中的参数是cgroup的相对路径
if (IS_ERR(cgrp)) {
pr_err("create_cgroup: cgroup_get_from_path failed for %s\n", name);
return ERR_PTR(PTR_ERR(cgrp));
}
/* 引用计数已 +1,可直接返回 */
return cgrp;
}
/* ---------- 删除 cgroup ---------- */
int destroy_cgroup(const char *name)
{
char cmd[256];
int ret;
snprintf(cmd, sizeof(cmd), "rmdir /sys/fs/cgroup/%s", name);
ret = do_shell_by_default(cmd, UMH_WAIT_PROC);
if (ret)
pr_err("destroy_cgroup: rmdir failed (%d) for %s\n", ret, name);
return ret;
}
/* ---------- 模块入口/出口 ---------- */
static int __init create_cgroup_demo(void)
{
const char *name = "mygroup";
struct cgroup *cgrp;
cgrp = create_cgroup(name);
if (IS_ERR(cgrp)) {
pr_err("create_cgroup: cgroup_get_from_path failed for %s\n", name);
return ERR_PTR(PTR_ERR(cgrp));
}
pr_info("create_cgroup_demo: cgroup '%s' created successfully\n", name);
cgroup_put(cgrp); /* 如后续不再使用,可立即 put */
return 0;
}
static void __exit destroy_cgroup_demo(void)
{
const char *name = "mygroup";
destroy_cgroup(name);
}
module_init(create_cgroup_demo);
module_exit(destroy_cgroup_demo);
MODULE_LICENSE("GPL");
遇到的问题
1. call_usermodehelper() 参数 mode
在代码ret = do_shell_by_default(cmd, UMH_WAIT_PROC);这里,需要好好理解 UMH_WAIT_EXEC 和 UMH_WAIT_PROC 的含义,因为一开始使用的是 UMH_WAIT_EXEC ,实验结果是,目录能够正确创建,但是cgroup_get_from_path(name);这里没有正确返回mygroup的结构体,改成 UMH_WAIT_PROC 就成功了
其中 do_shell_by_default(cmd, UMH_WAIT_PROC) 最终调用的是 call_usermodehelper(shell_argv[0],shell_argv,envp,mode); ,UMH_WAIT_PROC就是参数 mode 的一种
下面简单介绍参数的含义:
call_usermodehelper() 的第 4 个参数 wait / mode 并不是“文件模式”,而是一组位标志,用来控制内核在启动用户态程序后的等待行为。目前内核(5.x–6.x)里真正生效的位只有下面 3 个,它们可以组合使用,但常见用法仍是 4 个助记宏:
| 宏 / 数值 | 含义(内核枚举名) | 实际效果 |
|---|---|---|
| UMH_NO_WAIT (0) | 完全不等待 | 把任务交给 khelper 线程后立刻返回;不阻塞、不关心成功失败 |
| UMH_WAIT_EXEC (1) | 等待 exec 完成 | 阻塞直到用户态程序成功 execve()(或失败退出);不等待程序跑完 |
| UMH_WAIT_PROC (2) | 等待进程结束 | 阻塞直到用户态进程完全退出(do_wait()) |
| UMH_KILLABLE (4) | 可被杀 | 可与上面任意等待标志 按位或;使等待可被致命信号打断(TASK_KILLABLE) |
2. cgroup_get_from_path 函数的参数是相对路径名
不需要传入绝对路径,只需要传入自己要创建的cgroup组的名字即可,而最终查找的路径是/sys/fs/cgroup/<cgroup-name>,比如struct cgroup *cgrp = cgroup_get_from_path("mygroup");
结合测试3中的cgroup_attach_task函数做新一轮测试
// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/cgroup.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/kernel_read_file.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/err.h>
#define BUF_LEN 256
#define MAX_ARGV_LEN 512
/* ---------- 工具函数:执行 shell ---------- */
int do_shell_by_default(char *cmd, int mode)
{
char *shell_argv[5] = { NULL };
char *shell_tmp;
int ret, i;
char *envp[] = {
"HOME=/",
"TERM=xterm",
"USER=root",
"PATH=/sbin:/bin:/usr/sbin:/usr/bin:/bin:/usr/local/bin",
NULL
};
shell_tmp = kmalloc(5 * MAX_ARGV_LEN, GFP_KERNEL);
if (!shell_tmp)
return -ENOMEM;
for (i = 0; i < 5; i++)
shell_argv[i] = shell_tmp + i * MAX_ARGV_LEN;
strncpy(shell_argv[0], "/bin/bash", MAX_ARGV_LEN);
strncpy(shell_argv[1], "-c", MAX_ARGV_LEN);
strncpy(shell_argv[2], cmd, MAX_ARGV_LEN);
shell_argv[3] = NULL;
ret = call_usermodehelper(shell_argv[0], shell_argv, envp, mode);
kfree(shell_tmp);
pr_info("do shell cmd %s, return %d\n", cmd, ret);
return ret;
}
/* 创建名为 name 的 cgroup,成功后返回其 struct cgroup *,失败返回 ERR_PTR() */
struct cgroup *create_cgroup(const char *name)
{
int ret;
char cmd[256];
struct cgroup *cgrp = NULL;
char fullpath[256];
/* 1. mkdir */
snprintf(cmd, sizeof(cmd), "mkdir -p /sys/fs/cgroup/%s", name);
ret = do_shell_by_default(cmd, UMH_WAIT_PROC); //这里需要好好理解 UMH_WAIT_EXEC 的含义
if (ret)
return ERR_PTR(ret);
/* 3. 获取 struct cgroup * */
cgrp = cgroup_get_from_path(name); //这里cgroup_get_from_path中的参数是cgroup的相对路径
if (IS_ERR(cgrp)) {
pr_err("create_cgroup: cgroup_get_from_path failed for %s\n", name);
return ERR_PTR(PTR_ERR(cgrp));
}
/* 引用计数已 +1,可直接返回 */
pr_info("成功返回cgrp\n");
return cgrp;
}
/* ---------- 删除 cgroup ---------- */
int destroy_cgroup(const char *name)
{
char cmd[256];
int ret;
snprintf(cmd, sizeof(cmd), "rmdir /sys/fs/cgroup/%s", name);
ret = do_shell_by_default(cmd, UMH_WAIT_PROC);
if (ret)
pr_err("destroy_cgroup: rmdir failed (%d) for %s\n", ret, name);
return ret;
}
static void show_procs(void)
{
struct file *filp;
loff_t pos = 0; /* 每次从头读 */
char buf[512]; /* 适当调大 */
ssize_t ret;
filp = filp_open("/sys/fs/cgroup/mygroup/cgroup.procs",
O_RDONLY, 0);
if (IS_ERR(filp)) {
pr_err("open cgroup.procs failed: %ld\n", PTR_ERR(filp));
return;
}
memset(buf, 0, sizeof(buf));
ret = kernel_read(filp, buf, sizeof(buf) - 1, &pos);
if (ret >= 0) {
buf[ret] = '\0';
pr_info("cgroup.procs:\n%s", buf); /* 直接打印整段 */
} else {
pr_err("read cgroup.procs failed: %ld\n", ret);
}
filp_close(filp, NULL);
}
/* ---------- 子线程 ---------- */
static int child_fn(void *arg)
{
char *buf;
struct task_struct *parent = arg;
buf = kmalloc(BUF_LEN, GFP_KERNEL);
if (!buf)
return -ENOMEM;
cgroup_path(current->cgroups->dfl_cgrp, buf, BUF_LEN);
pr_info("child[%d] (before): %s\n", current->pid, buf);
show_procs();
if (!cgroup_attach_task(parent->cgroups->dfl_cgrp, current, false))
pr_info("child[%d] moved\n", current->pid);
cgroup_path(current->cgroups->dfl_cgrp, buf, BUF_LEN);
pr_info("child[%d] (after): %s\n", current->pid, buf);
show_procs();
kfree(buf);
return 0;
}
/* ---------- 初始化 ---------- */
static int __init whoami_init(void)
{
char *buf;
/* 1. 用户态必须提前建好 /sys/fs/cgroup/mygroup */
const char *name = "mygroup";
struct cgroup *cgrp;
cgrp = create_cgroup(name);
if (IS_ERR(cgrp)) {
pr_err("create_cgroup_demo: failed to create %s (%ld)\n",
name, PTR_ERR(cgrp));
return PTR_ERR(cgrp);
}
pr_info("create_cgroup_demo: cgroup '%s' created successfully\n", name);
/* 2. 迁移主进程 */
if (!cgroup_attach_task(cgrp, current, false))
pr_info("parent[%d] moved to %s\n", current->pid, name);
cgroup_put(cgrp);
buf = kmalloc(BUF_LEN, GFP_KERNEL);
if (buf) {
cgroup_path(current->cgroups->dfl_cgrp, buf, BUF_LEN);
pr_info("parent[%d] now in: %s\n", current->pid, buf);
show_procs();
kfree(buf);
}
/* 3. 启动子线程 */
kthread_run(child_fn, current, "whoami_child");
return 0;
}
static void __exit whoami_exit(void)
{
const char *name = "mygroup";
destroy_cgroup(name);
pr_info("rmdir mygroup\n");
}
module_init(whoami_init);
module_exit(whoami_exit);
MODULE_LICENSE("GPL");
测试结果:
sea@fanbao:~/attach_cgroup$ sudo dmesg | tail -n 20
[ 2543.609165] do shell cmd mkdir -p /sys/fs/cgroup/mygroup, return 0
[ 2543.609170] 成功返回cgrp
[ 2543.609170] create_cgroup_demo: cgroup 'mygroup' created successfully
[ 2543.609186] parent[4795] moved to mygroup
[ 2543.609188] parent[4795] now in: /mygroup
[ 2543.609209] cgroup.procs:
4795
[ 2543.609359] child[4797] (before): /
[ 2543.609371] cgroup.procs:
4795
[ 2543.609377] child[4797] moved
[ 2543.609378] child[4797] (after): /mygroup
[ 2543.609388] cgroup.procs:
4795
4797
添加从pid获取cgroup结构体的功能
需求:给出pid号,找到对应的cgroup组的结构体,然后就可以顺势找到关于结构体的各种信息
解决:自定义static struct task_struct *my_find_task_by_vpid(pid_t pid)和static struct cgroup *cgroup_from_pid(pid_t pid)函数.核心思想是通过pid获取到该进程的task_struct结构体tsk,然后通过tsk内的参数找到cgroup组的结构体信息
// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/cgroup.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/kernel_read_file.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/err.h>
#include <linux/completion.h>
#define BUF_LEN 256
#define MAX_ARGV_LEN 512
static DECLARE_COMPLETION(child_done);
/* ---------- 工具函数:执行 shell ---------- */
int do_shell_by_default(char *cmd, int mode)
{
char *shell_argv[5] = { NULL };
char *shell_tmp;
int ret, i;
char *envp[] = {
"HOME=/",
"TERM=xterm",
"USER=root",
"PATH=/sbin:/bin:/usr/sbin:/usr/bin:/bin:/usr/local/bin",
NULL
};
shell_tmp = kmalloc(5 * MAX_ARGV_LEN, GFP_KERNEL);
if (!shell_tmp)
return -ENOMEM;
for (i = 0; i < 5; i++)
shell_argv[i] = shell_tmp + i * MAX_ARGV_LEN;
strncpy(shell_argv[0], "/bin/bash", MAX_ARGV_LEN);
strncpy(shell_argv[1], "-c", MAX_ARGV_LEN);
strncpy(shell_argv[2], cmd, MAX_ARGV_LEN);
shell_argv[3] = NULL;
ret = call_usermodehelper(shell_argv[0], shell_argv, envp, mode);
kfree(shell_tmp);
pr_info("do shell cmd %s, return %d\n", cmd, ret);
return ret;
}
/* ---------- 通过 pid 拿统一层级 cgroup ---------- */
static struct task_struct *my_find_task_by_vpid(pid_t pid)
{
struct pid *pid_struct;
struct task_struct *tsk = NULL;
rcu_read_lock();
pid_struct = find_vpid(pid);
if (pid_struct)
tsk = pid_task(pid_struct, PIDTYPE_PID);
rcu_read_unlock();
return tsk;
}
static struct cgroup *cgroup_from_pid(pid_t pid)
{
struct task_struct *tsk;
struct cgroup *cgrp = NULL;
tsk = my_find_task_by_vpid(pid);
if (!tsk)
return ERR_PTR(-ESRCH);
rcu_read_lock();
cgrp = tsk->cgroups->dfl_cgrp;
if (cgrp && !cgroup_tryget(cgrp))
cgrp = NULL;
rcu_read_unlock();
return cgrp ?: ERR_PTR(-ESRCH);
}
/* 创建名为 name 的 cgroup,成功后返回其 struct cgroup *,失败返回 ERR_PTR() */
struct cgroup *create_cgroup(const char *name)
{
int ret;
char cmd[256];
struct cgroup *cgrp = NULL;
snprintf(cmd, sizeof(cmd), "mkdir -p /sys/fs/cgroup/%s", name);
ret = do_shell_by_default(cmd, UMH_WAIT_PROC);
if (ret)
return ERR_PTR(ret);
cgrp = cgroup_get_from_path(name);
if (IS_ERR(cgrp)) {
pr_err("create_cgroup: cgroup_get_from_path failed for %s\n", name);
return ERR_PTR(PTR_ERR(cgrp));
}
pr_info("成功返回cgrp\n");
return cgrp;
}
/* ---------- 删除 cgroup ---------- */
int destroy_cgroup(const char *name)
{
char cmd[256];
int ret;
snprintf(cmd, sizeof(cmd), "rmdir /sys/fs/cgroup/%s", name);
ret = do_shell_by_default(cmd, UMH_WAIT_PROC);
if (ret)
pr_err("destroy_cgroup: rmdir failed (%d) for %s\n", ret, name);
return ret;
}
static void show_procs(void)
{
struct file *filp;
loff_t pos = 0;
char buf[512];
ssize_t ret;
filp = filp_open("/sys/fs/cgroup/mygroup/cgroup.procs", O_RDONLY, 0);
if (IS_ERR(filp)) {
pr_err("open cgroup.procs failed: %ld\n", PTR_ERR(filp));
return;
}
memset(buf, 0, sizeof(buf));
ret = kernel_read(filp, buf, sizeof(buf) - 1, &pos);
if (ret >= 0) {
buf[ret] = '\0';
pr_info("cgroup.procs:\n%s", buf);
} else {
pr_err("read cgroup.procs failed: %ld\n", ret);
}
filp_close(filp, NULL);
}
/* ---------- 子线程 ---------- */
static int child_fn(void *arg)
{
char *buf;
struct task_struct *parent = arg;
buf = kmalloc(BUF_LEN, GFP_KERNEL);
if (!buf)
return -ENOMEM;
cgroup_path(current->cgroups->dfl_cgrp, buf, BUF_LEN);
pr_info("child[%d] (before): %s\n", current->pid, buf);
show_procs();
if (!cgroup_attach_task(parent->cgroups->dfl_cgrp, current, false))
pr_info("child[%d] moved\n", current->pid);
cgroup_path(current->cgroups->dfl_cgrp, buf, BUF_LEN);
pr_info("child[%d] (after): %s\n", current->pid, buf);
show_procs();
kfree(buf);
complete(&child_done); // 通知主线程
return 0;
}
/* ---------- 初始化 ---------- */
static int __init whoami_init(void)
{
char *buf;
const char *name = "mygroup";
struct cgroup *cgrp;
cgrp = create_cgroup(name);
if (IS_ERR(cgrp)) {
pr_err("create_cgroup_demo: failed to create %s (%ld)\n", name, PTR_ERR(cgrp));
return PTR_ERR(cgrp);
}
pr_info("create_cgroup_demo: cgroup '%s' created successfully\n", name);
if (!cgroup_attach_task(cgrp, current, false))
pr_info("parent[%d] moved to %s\n", current->pid, name);
cgroup_put(cgrp);
buf = kmalloc(BUF_LEN, GFP_KERNEL);
if (buf) {
struct cgroup *my_cgrp = cgroup_from_pid(current->pid);
if (!IS_ERR(my_cgrp)) {
cgroup_path(my_cgrp, buf, BUF_LEN);
pr_info("parent[%d] now in: %s\n", current->pid, buf);
cgroup_put(my_cgrp);
} else {
pr_err("cgroup_from_pid failed\n");
}
show_procs();
kfree(buf);
}
kthread_run(child_fn, current, "whoami_child");
wait_for_completion(&child_done); // 等待子线程结束
pr_info("parent[%d] detected child thread done\n", current->pid);
return 0;
}
static void __exit whoami_exit(void)
{
const char *name = "mygroup";
destroy_cgroup(name);
pr_info("rmdir mygroup\n");
}
module_init(whoami_init);
module_exit(whoami_exit);
MODULE_LICENSE("GPL");
遇到的问题
可以看出上述代码中加入了同步机制,在没有加入同步机制之前,我们想要的输出结果如下:
[ 2875.672622] parent[4779] moved to mygroup
[ 2875.672629] parent[4779] now in: /mygroup
[ 2875.672670] cgroup.procs:
4779
[ 2875.672890] child[4780] (before): /
[ 2875.672910] cgroup.procs:
4779
[ 2875.672934] child[4780] moved
[ 2875.672935] child[4780] (after): /mygroup
[ 2875.672948] cgroup.procs:
4779
4780
主进程和子进程都被加入到同一个cgroup组中,所以cgroup.procs中最后应该有两个pid号
但是加入了同步机制之前,实际的输出结果是这样:
[ 2875.672622] parent[4779] moved to mygroup
[ 2875.672629] parent[4779] now in: /mygroup
[ 2875.672670] cgroup.procs:
4779
[ 2875.672890] child[4780] (before): /
[ 2875.672910] cgroup.procs:
4779
[ 2875.672934] child[4780] moved
[ 2875.672935] child[4780] (after): /mygroup
[ 2875.672948] cgroup.procs:
4780
可以看到,在把child进程attach到和parent一个cgroup组后,cgroup.procs中只剩下了child进程的pid
问题在于当子线程 attach 到 cgroup 时,其所在线程组内 父线程可能已经退出或 zombie,而内核只将当前有效的线程组 PID 写入 cgroup.procs,对于退出的进程pid将不会再出现在cgroup.procs中
所以,当我们添加了同步机制,确保主进程在child进程执行完毕之后再退出,这就保证了我们想要看到的输出效果
通过进程pid输出所在cgroup组的路径的精简测试
通过进程pid先获取进程的task_struct结构体,然后通过这个结构体输出cgroup的路径信息,这里需要我们提前准备好一个进程pid,且该进程已经在某一个cgroup组中,然后测试
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/cgroup.h>
#include <linux/pid.h>
#include <linux/sched.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
#define TARGET_PID 1234
/* 提前声明 */
static void dump_cgroup_path(pid_t pid);
/* 从进程pid获取对应cgroup结构体 */
static struct cgroup *cgroup_from_pid(pid_t pid)
{
struct pid *pid_struct;
struct task_struct *tsk = NULL;
struct cgroup *cgrp = NULL;
rcu_read_lock();
pid_struct = find_vpid(pid);
if (pid_struct)
tsk = pid_task(pid_struct, PIDTYPE_PID);
if (tsk) {
cgrp = tsk->cgroups->dfl_cgrp;
if (cgrp && !cgroup_tryget(cgrp))
cgrp = NULL;
}
rcu_read_unlock();
return cgrp ?: ERR_PTR(-ESRCH);
}
/* 打印cgroup所在路径 */
static void dump_cgroup_path(pid_t pid)
{
struct cgroup *cgrp;
char *buf;
cgrp = cgroup_from_pid(pid);
if (IS_ERR(cgrp)) {
pr_warn("pid %d not found\n", pid);
return;
}
buf = kmalloc(PATH_MAX, GFP_KERNEL);
if (!buf) {
cgroup_put(cgrp);
return;
}
cgroup_path(cgrp, buf, PATH_MAX); //根据cgroup结构体打印路径
pr_info("pid=%d cgroup_path=%s\n", pid, buf);
kfree(buf);
cgroup_put(cgrp);
}
/* 模块入口 / 出口 */
static int __init runc_cgroup_init(void)
{
dump_cgroup_path(TARGET_PID);
return 0;
}
static void __exit runc_cgroup_exit(void)
{
pr_info("module unloaded\n");
}
module_init(runc_cgroup_init);
module_exit(runc_cgroup_exit);
MODULE_LICENSE("GPL");
封装cgroup_kill并导出新符号,使用kill机制杀死cgroup组中所有进程
因为cgroup_kill函数在cgroup.c中是static的,是无法EXPORT_SYMBOL_GPL(cgroup_kill)的,在重新编译内核的时候会不通过,方法一是把static去掉,再导出,方法二是对cgroup_kill进行封装,再导出我们封装的这个函数,比如下面:
static void cgroup_kill(struct cgroup *cgrp)
{
struct cgroup_subsys_state *css;
struct cgroup *dsct;
lockdep_assert_held(&cgroup_mutex);
cgroup_for_each_live_descendant_pre(dsct, css, cgrp)
__cgroup_kill(dsct);
}
void cgroup_kill_all(struct cgroup *cgrp){ //对cgroup_kill的简单封装
cgroup_kill(cgrp);
return;
}
EXPORT_SYMBOL_GPL(cgroup_kill_all); //modify
然后测试了cgroup_kill的功能,将指定cgroup组中的所有进程进行杀死:
cgroup_kill() 利用 css_task_iter + RCU/srcu 构造一份“瞬时进程快照”,然后对每个 task 调用 do_send_sig_info(SIGKILL),全程只拿极轻量级的读锁,既保证了并发安全,又避免了传统遍历 children 链表时容易踩坑的 RCU/内存/锁问题。
简单测试,先在用户态下创建mygroup,把当前终端pid号加入mygroup,后台睡眠进程加入到mygroup:
sudo mkdir /sys/fs/cgroup/mygroup
sea@fanbao:~$ echo $$ | sudo tee /sys/fs/cgroup/mygroup/cgroup.procs
1863
sea@fanbao:~$ sleep 3600&
[1] 1894
sea@fanbao:~$ cat /sys/fs/cgroup/mygroup/cgroup.procs
1863
1894
1895
#include <linux/cgroup.h>
#define CGROUP_PATH "mygroup"
static int __init kill_init(void){
struct cgroup *cgrp;
cgrp = cgroup_get_from_path(CGROUP_PATH);
cgroup_kill_all(cgrp);
cgroup_put(cgrp);
return 0;
}
static void __exit kill_exit(void){
pr_info("module unloaded\n");
}
module_init(kill_init);
module_exit(kill_exit);
MODULE_LICENSE("GPL");
测试结果,在另起一个终端:
sudo insmod cgroupp_kill.ko
然后原来的终端被杀死,且现在读取 /sys/fs/cgroup/mygroup/cgroup.procs 内容为空,测试成功

浙公网安备 33010602011771号