Linux系统调用hook
一、介绍
Linux系统的用户层(也就是ring3)使用操作系统提供的各种服务(ring0),需要通过软件中断(0x80,中断描述符表中对应系统调用处理),从ring3切换到ring0,通过系统调用来获取对应系统服务。
二、实现
1.基于修改 sys_call_table 的hook方式
系统调用表可以理解为一个存储着系统调用处理函数地址的数组,通过以系统调用号为索引访问各项函数地址。
我们要修改系统调用表,首先就要先获得系统调用表sys_call_table的内存地址。
方案1:直接修改sys_call_table
当我们内核版本<=5.7时,可以直接通过kallsyms_lookup_name函数获得sys_call_table的地址。我们只需要关闭写保护和修改sys_call_table的内存页为可写,即可直接hook系统调用。
#include <linux/err.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> #include <linux/dirent.h> #include <linux/syscalls.h> #include <linux/unistd.h> #include <linux/proc_fs.h> #include <asm/uaccess.h> #include <linux/namei.h> #include <linux/fs.h> #include <linux/utsname.h> #include <linux/file.h> #include <linux/fdtable.h> #include <linux/slab.h> #include <linux/proc_ns.h> #include <linux/kallsyms.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Anzi"); MODULE_DESCRIPTION("A test module"); MODULE_VERSION("1:1.0"); unsigned long** sys_call_table; int (*set_mem_rw)(unsigned long addr, int numpages); asmlinkage long (*orig_setuid)(uid_t uid); asmlinkage long (*orig_getdents)(unsigned int fd, struct linux_dirent *dirp, unsigned int count); asmlinkage long (*orig_getdents64)(unsigned int fd, struct linux_dirent64 *dirp, unsigned int count); asmlinkage long (*orig_read)(unsigned int fd, char *buf, size_t count); asmlinkage long (*orig_open)(const char __user *filename, int flags, umode_t mode); asmlinkage long (*orig_lstat)(const char __user *filename, struct __old_kernel_stat __user *statbuf); asmlinkage long (*orig_stat)(const char __user *filename, struct __old_kernel_stat __user *statbuf); struct linux_dirent { unsigned long d_ino; unsigned long d_off; unsigned short d_reclen; char d_name[1]; }; struct getdents_callback { struct dir_context ctx; struct linux_dirent __user * current_dir; struct linux_dirent __user * previous; int count; int error; }; struct getdents_callback64 { struct dir_context ctx; struct linux_dirent64 __user * current_dir; struct linux_dirent64 __user * previous; int count; int error; }; inline void mywrite_cr0(unsigned long cr0) { asm volatile("mov %0,%%cr0" : "+r"(cr0), "+m"(__force_order)); } void enable_write_protect(void) { unsigned long cr0; preempt_disable(); // 关闭内核抢占 barrier(); // 保证在执行hazard barriers之前,cr0寄存器被写入新的值 cr0 = read_cr0(); // 读入cr0寄存器的当前值 set_bit(16, &cr0); // 设置cr0寄存器的WP位(第16位)为1,启用写保护 // native_write_cr0(cr0); //将新的cr0值写回寄存器 mywrite_cr0(cr0); barrier(); // 信号前CR0寄存器的新值应用 preempt_enable(); // 打开内核抢占 } void disable_write_protect(void) { unsigned long cr0; preempt_disable(); // 关闭内核抢占 barrier(); // 保证在执行hazard barriers之前,cr0寄存器被写入新的值 cr0 = read_cr0(); // 读入cr0寄存器的当前值 printk(KERN_INFO"A1:%p\n",cr0); clear_bit(16, &cr0); // 清除cr0寄存器的WP位(第16位),关闭写保护 cr0 = cr0 & ~(1 << 15); printk(KERN_INFO"B:%p\n",cr0); // native_write_cr0(cr0); //将新的cr0值写回寄存器 mywrite_cr0(cr0); cr0 = read_cr0(); // 读入cr0寄存器的当前值 printk(KERN_INFO"A2:%p\n",cr0); barrier(); // 信号前CR0寄存器的新值应用 preempt_enable(); // 打开内核抢占 } static int filldir(struct dir_context *ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct linux_dirent __user * dirent; struct getdents_callback *buf = container_of(ctx, struct getdents_callback, ctx); unsigned long d_ino; int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, sizeof(long)); buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return -EINVAL; d_ino = ino; if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { buf->error = -EOVERFLOW; return -EOVERFLOW; } dirent = buf->previous; if (dirent) { if (signal_pending(current)) return -EINTR; if (__put_user(offset, &dirent->d_off)) goto efault; } dirent = buf->current_dir; if (__put_user(d_ino, &dirent->d_ino)) goto efault; if (__put_user(reclen, &dirent->d_reclen)) goto efault; if (copy_to_user(dirent->d_name, name, namlen)) goto efault; if (__put_user(0, dirent->d_name + namlen)) goto efault; if (__put_user(d_type, (char __user *) dirent + reclen - 1)) goto efault; buf->previous = dirent; dirent = (void __user *)dirent + reclen; buf->current_dir = dirent; buf->count -= reclen; return 0; efault: buf->error = -EFAULT; return -EFAULT; } static int filldir64(struct dir_context *ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct linux_dirent64 __user *dirent; struct getdents_callback64 *buf = container_of(ctx, struct getdents_callback64, ctx); int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, sizeof(u64)); buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return -EINVAL; dirent = buf->previous; if (dirent) { if (signal_pending(current)) return -EINTR; if (__put_user(offset, &dirent->d_off)) goto efault; } dirent = buf->current_dir; if (__put_user(ino, &dirent->d_ino)) goto efault; if (__put_user(0, &dirent->d_off)) goto efault; if (__put_user(reclen, &dirent->d_reclen)) goto efault; if (__put_user(d_type, &dirent->d_type)) goto efault; if (copy_to_user(dirent->d_name, name, namlen)) goto efault; if (__put_user(0, dirent->d_name + namlen)) goto efault; buf->previous = dirent; dirent = (void __user *)dirent + reclen; buf->current_dir = dirent; buf->count -= reclen; return 0; efault: buf->error = -EFAULT; return -EFAULT; } int ksys_getdents(unsigned int fd, struct linux_dirent __user *dirent, unsigned int count) { struct fd f; struct linux_dirent __user * lastdirent; struct getdents_callback buf = { .ctx.actor = filldir, .count = count, .current_dir = dirent }; int error; // if (!access_ok(VERIFY_WRITE, dirent, count)) // return -EFAULT; f = fdget(fd); if (!f.file) return -EBADF; error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; lastdirent = buf.previous; if (lastdirent) { if (put_user(buf.ctx.pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; } fdput(f); return error; } int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, unsigned int count) { struct fd f; struct linux_dirent64 __user * lastdirent; struct getdents_callback64 buf = { .ctx.actor = filldir64, .count = count, .current_dir = dirent }; int error; // if (!access_ok(VERIFY_WRITE, dirent, count)) // return -EFAULT; f = fdget(fd); if (!f.file) return -EBADF; error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; lastdirent = buf.previous; if (lastdirent) { typeof(lastdirent->d_off) d_off = buf.ctx.pos; if (__put_user(d_off, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; } fdput(f); return error; } // static long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) // { // struct open_flags op; // int fd = build_open_flags(flags, mode, &op); // struct filename *tmp; // if (fd) // return fd; // tmp = getname(filename); // if (IS_ERR(tmp)) // return PTR_ERR(tmp); // fd = get_unused_fd_flags(flags); // if (fd >= 0) { // struct file *f = do_filp_open(dfd, tmp, &op); // if (IS_ERR(f)) { // put_unused_fd(fd); // fd = PTR_ERR(f); // } else { // fsnotify_open(f); // fd_install(fd, f); // } // } // putname(tmp); // return fd; // } //hook替换的函数 asmlinkage long hacked_getdents(unsigned int fd, struct linux_dirent *dirp, unsigned int count) { long ori_ret = 0; long new_ret = 0; ori_ret = (*orig_getdents)(fd, dirp, count); new_ret = ksys_getdents(fd, dirp, count); if(ori_ret != new_ret) printk(KERN_INFO"BUG: getdents is trouble!!!\n"); return new_ret; } asmlinkage long hacked_getdents64(unsigned int fd, struct linux_dirent64 *dirp, unsigned int count) { long ori_ret = 0; long new_ret = 0; ori_ret = (*orig_getdents64)(fd, dirp, count); new_ret = ksys_getdents64(fd, dirp, count); if(ori_ret != new_ret) printk(KERN_INFO"BUG: getdents64 is trouble!!!\n"); return new_ret; } //hook函数 int hookMyGetdents(void){ // orig_getdents = (void *)sys_call_table[__NR_getdents]; // sys_call_table[__NR_getdents] = (unsigned long*)&hacked_getdents; orig_getdents64 = (void *)sys_call_table[217];//__NR_getdents64 printk(KERN_INFO "Hook before:%p\n", orig_getdents64); set_mem_rw((unsigned long)sys_call_table[217], 4096); printk(KERN_INFO "modify!!!\n"); sys_call_table[217] = 0x1d851ea2; return 0; } static int __init hello_init(void) { // printk(KERN_INFO "Hello hacker!\n"); sys_call_table = (void *)kallsyms_lookup_name("sys_call_table"); set_mem_rw = (void *)kallsyms_lookup_name("set_memory_rw"); disable_write_protect(); hookMyGetdents(); enable_write_protect(); // void* tmp = (void *)sys_call_table[217]; // printk(KERN_INFO "Hook affter:%p\n", tmp); // return 0; } static void __exit hello_exit(void) { printk(KERN_INFO "Hello: Goodbye, crazy world!\n"); } module_init(hello_init); module_exit(hello_exit);
方案2:基于kprobe函数获得kallsyms_lookup_name
最简单的方式就是通过kallsyms_lookup_name函数获得。但是,在内核 > 5.7 中不再导出该函数,所以我们这里分为两个实现方案,一个仅适用于5.7以前的内核,另一个可以在5.7以上内核里使用(普适)。
内核 <= 5.7代码如下:
#include <linux/kallsyms.h> sysCallTable = (void *)kallsyms_lookup_name("sys_call_table"); open = (void *)sysCallTable[__NR_open]; //函数指针 = (void *)sysCallTable[目标函数的系统调用号];
Kprobe系统允许我们将断点动态插入到正在运行的内核中。当断点触发时,我们获取地址,即目标函数的地址。获取到目标函数地址后,我们取消注册的断点,让内核继续正常运行。
内核(普适版)代码如下:
#include <linux/kprobes.h>
static struct kprobe kp = {
.symbol_name = "kallsyms_lookup_name"
};
typedef unsigned long (*kallsyms_lookup_name_t)(const char *name);
kallsyms_lookup_name_t kallsyms_lookup_name;
...
register_kprobe(&kp);
kallsyms_lookup_name = (kallsyms_lookup_name_t) kp.addr;
uregister_kprobe(&kp);
PS:要使用该函数必须启用 CONFIG_KALLSYMS
编译内核,一般发布的内核版本都有。
方案3:暴力搜索内存
通过查看内核中每一个地址作为系统调用表的起始地址,判断其某一项的地址是否与对应的系统调用处理函数相同,若相同则证明该地址为系统调用表的起始地址。
# include <linux/module.h> # include <linux/kernel.h> unsigned long ** getSysCallTable(void) { unsigned long **entry = (unsigned long **)PAGE_OFFSET; //PAGE_OFFSET是内核内存空间的起始地址 for (;(unsigned long)entry < ULONG_MAX; entry += 1) { if (entry[__NR_close] == (unsigned long *)sys_close) { return entry; } } return NULL; }
其中,sys_close 是导出函数,我们可以事先就知道该系统调用处理函数(__NR_close)的地址。
方案4:基于System.map文件
内核镜像的 System.map
文件存储了内核符号表的信息, 可以通过此文件获取到具体的信息。
手动通过命令行获取如下:
cat /boot/System.map-$(uname -r) | grep "sys_call_table"
代码实现如下:
#include <linux/module.h> #include <linux/kernel.h> #include <linux/unistd.h> #include <linux/slab.h> #define PROC_V "/proc/version" #define BOOT_PATH "/boot/System.map-" #define MAX_VERSION_LEN 256 unsigned long *syscall_table = NULL; static int find_sys_call_table (char *kern_ver) { char system_map_entry[MAX_VERSION_LEN]; int i = 0; char *filename; size_t filename_length = strlen(kern_ver) + strlen(BOOT_PATH) + 1; struct file *f = NULL; mm_segment_t oldfs; oldfs = get_fs(); set_fs (KERNEL_DS); printk(KERN_EMERG "Kernel version: %s\n", kern_ver); filename = kmalloc(filename_length, GFP_KERNEL); if (filename == NULL) { printk(KERN_EMERG "kmalloc failed on System.map-<version> filename allocation"); return -1; } memset(filename, 0, filename_length); strncpy(filename, BOOT_PATH, strlen(BOOT_PATH)); strncat(filename, kern_ver, strlen(kern_ver)); f = filp_open(filename, O_RDONLY, 0); if (IS_ERR(f) || (f == NULL)) { printk(KERN_EMERG "Error opening System.map-<version> file: %s\n", filename); return -1; } memset(system_map_entry, 0, MAX_VERSION_LEN); while (vfs_read(f, system_map_entry + i, 1, &f->f_pos) == 1) { if ( system_map_entry[i] == '\n' || i == MAX_VERSION_LEN ) { i = 0; if (strstr(system_map_entry, "sys_call_table") != NULL) { char *sys_string; char *system_map_entry_ptr = system_map_entry; sys_string = kmalloc(MAX_VERSION_LEN, GFP_KERNEL); if (sys_string == NULL) { filp_close(f, 0); set_fs(oldfs); kfree(filename); return -1; } memset(sys_string, 0, MAX_VERSION_LEN); strncpy(sys_string, strsep(&system_map_entry_ptr, " "), MAX_VERSION_LEN); kstrtoul(sys_string, 16, &syscall_table); printk(KERN_EMERG "syscall_table retrieved\n"); kfree(sys_string); break; } memset(system_map_entry, 0, MAX_VERSION_LEN); continue; } i++; } filp_close(f, 0); set_fs(oldfs); kfree(filename); return 0; } char *acquire_kernel_version(char *buf) { struct file *proc_version; char *kernel_version; mm_segment_t oldfs; oldfs = get_fs(); set_fs (KERNEL_DS); proc_version = filp_open(PROC_V, O_RDONLY, 0); if (IS_ERR(proc_version) || (proc_version == NULL)) { return NULL; } memset(buf, 0, MAX_VERSION_LEN); vfs_read(proc_version, buf, MAX_VERSION_LEN, &(proc_version->f_pos)); kernel_version = strsep(&buf, " "); kernel_version = strsep(&buf, " "); kernel_version = strsep(&buf, " "); filp_close(proc_version, 0); set_fs(oldfs); return kernel_version; } char *kernel_version = kmalloc(MAX_VERSION_LEN, GFP_KERNEL); find_sys_call_table(acquire_kernel_version(kernel_version));
2.基于中断描述符表(IDT)的hook
因为系统调用可以通过0x80中断(兼容模式下,现在64位机器都是通过syscall指令实现系统调用)来进行的,故可以通过查找0x80中断的处理程序来获得sys_call_table的地址。其基本步骤是:
- 获取中断描述符表(IDT)的地址(使用C ASM汇编)
- 从中查找0x80中断(系统调用中断)的服务例程(8*0x80偏移)
- 搜索该例程的内存空间,
- 从其中获取sys_call_table(保存所有系统调用例程的入口地址)的地址
#include <linux/module.h> #include <linux/kernel.h> // 中断描述符表寄存器结构 struct { unsigned short limit; unsigned int base; } __attribute__((packed)) idtr; // 中断描述符表结构 struct { unsigned short off1; unsigned short sel; unsigned char none, flags; unsigned short off2; } __attribute__((packed)) idt; // 查找sys_call_table的地址 void disp_sys_call_table(void) { unsigned int sys_call_off; unsigned int sys_call_table; char* p; int i; // 获取中断描述符表寄存器的地址 asm("sidt %0":"=m"(idtr)); printk("addr of idtr: %x\n", &idtr); // 获取0x80中断处理程序的地址 memcpy(&idt, idtr.base+8*0x80, sizeof(idt)); sys_call_off=((idt.off2<<16)|idt.off1); printk("addr of idt 0x80: %x\n", sys_call_off); // 从0x80中断服务例程中搜索sys_call_table的地址 p=sys_call_off; for (i=0; i<100; i++) { if (p=='\xff' && p[i+1]=='\x14' && p[i+2]=='\x85') { sys_call_table=*(unsigned int*)(p+i+3); printk("addr of sys_call_table: %x\n", sys_call_table); return ; } } }
3.基于kprobe提供的接口进行hook
4.基于ftrace的hook
关闭写保护
因为系统调用表sys_call_table所在的内存空间(对应页表)是只读内存区域,所以需要关闭写保护。开启写保护后,会对特定页表的写权限进行监控,无法修改为可写。我们需要先关闭写保护后,修改系统调用表的页表为可写,才能hook系统调用表。
写保护是通过 CR0 寄存器控制:开启或者关闭, 只需要修改一个比特,也就是从 0 开始数的第 16 个比特位。
在Linux内核(以4.18.0为例)中,native_write_cr0函数负责将新值写入cr0寄存器中,从内核5.3开始该函数会检查cr0是否修改为可写,若是则不作修改并警告,代码如下:
使用 read_cr0 / write_cr0 来读取 / 写入 CR0 寄存器,或者手写内联汇编。关闭写保护代码如下:
PS:开启写保护只要将对应标志位置1即可。
四、补充
在内核中获取目标函数的地址,可以通过命令:cat /proc/kallsyms | 函数名 。/proc/kallsyms 包含了整个内核的大部分符号(也可以通过配置CONFIG_KALLSYMS_ALL来获取全部)。/proc/kallsyms与/boot/System.map的区别在于,kallsyms是动态更新的,与运行中的内核同步。
五、参考
https://docs-conquer-the-universe.readthedocs.io/zh_CN/latest/linux_rootkit/sys_call_table.html
https://github.com/TangentHuang/ucas-rootkit
https://tnichols.org/2015/10/19/Hooking-the-Linux-System-Call-Table/
https://www.cnblogs.com/LittleHann/p/3854977.html#_lab2_3_2