Glibc中write() 的实现与系统调用入口机制
源码
#include <unistd.h>
int main() {
write(1,"hello,world\n",12);
return 0;
}
分析
write 是 Linux 内核提供的系统调用。Glibc 提供了一个名为 write() 的函数,它是对该系统调用的封装。在
sysdeps/unix/sysv/linux/write.c中:
ssize_t __libc_write (int fd, const void *buf, size_t nbytes)
{
return SYSCALL_CANCEL (write, fd, buf, nbytes);
}
libc_hidden_def (__libc_write)
weak_alias (__libc_write, __write)
libc_hidden_weak (__write)
weak_alias (__libc_write, write)
libc_hidden_weak (write)
通过SYSCALL_CANCEL进行包装,在sysdeps/unix/sysdep.h中
// sysdeps/unix/sysdep.h
#define SYSCALL_CANCEL(...) \
({ \
long int sc_ret; \
if (NO_SYSCALL_CANCEL_CHECKING) \
sc_ret = INLINE_SYSCALL_CALL (__VA_ARGS__); \
else \
{ \
int sc_cancel_oldtype = LIBC_CANCEL_ASYNC (); \
sc_ret = INLINE_SYSCALL_CALL (__VA_ARGS__); \
LIBC_CANCEL_RESET (sc_cancel_oldtype); \
} \
sc_ret; \
})
通过INLINE_SYSCALL_CALL又进行一次包装,
// sysdeps/unix/sysdep.h
#define __SYSCALL_CONCAT_X(a,b) a##b
#define __SYSCALL_CONCAT(a,b) __SYSCALL_CONCAT_X (a, b)
#define __INLINE_SYSCALL3(name, a1, a2, a3) \
INLINE_SYSCALL (name, 3, a1, a2, a3)
#define __INLINE_SYSCALL_NARGS_X(a,b,c,d,e,f,g,h,n,...) n
#define __INLINE_SYSCALL_NARGS(...) \
__INLINE_SYSCALL_NARGS_X (__VA_ARGS__,7,6,5,4,3,2,1,0,)
#define __INLINE_SYSCALL_DISP(b,...) \
__SYSCALL_CONCAT (b,__INLINE_SYSCALL_NARGS(__VA_ARGS__))(__VA_ARGS__)
#define INLINE_SYSCALL_CALL(...) \
__INLINE_SYSCALL_DISP (__INLINE_SYSCALL, __VA_ARGS__)
INLINE_SYSCALL_CALL(write, fd, buf, count)等价于
__INLINE_SYSCALL_DISP(__INLINE_SYSCALL, write, fd, buf, count)
我们需要先求 __INLINE_SYSCALL_NARGS(write, fd, buf, count)
展开为
__INLINE_SYSCALL_NARGS_X(write, fd, buf, count, 7,6,5,4,3,2,1,0)
#define __INLINE_SYSCALL_NARGS_X(a,b,c,d,e,f,g,h,n,...) n
一一对应
a --> write
b --> fd
c --> buf
......
n -->3
所以
__INLINE_SYSCALL_NARGS(write, fd, buf, count) → 3
得到
__SYSCALL_CONCAT(__INLINE_SYSCALL, 3)
即
__INLINE_SYSCALL3(write, fd, buf, count)
根据
#define __INLINE_SYSCALL3(name, a1, a2, a3) \
INLINE_SYSCALL(name, 3, a1, a2, a3)
得到
__INLINE_SYSCALL3(write, fd, buf, count)
↓
INLINE_SYSCALL(write, 3, fd, buf, count)
// sysdeps/unix/sysv/linux/sysdep.h
#undef INLINE_SYSCALL
#define INLINE_SYSCALL(name, nr, args...) \
({ \
long int sc_ret = INTERNAL_SYSCALL (name, nr, args); \
__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (sc_ret)) \
? SYSCALL_ERROR_LABEL (INTERNAL_SYSCALL_ERRNO (sc_ret)) \
: sc_ret; \
})
最终调用 INTERNAL_SYSCALL(write, 3, fd, buf, count)
// sysdeps/unix/sysv/linux/x86_64/sysdep.h
#undef INTERNAL_SYSCALL
#define INTERNAL_SYSCALL(name, nr, args...) \
internal_syscall##nr (SYS_ify (name), args)
变为
internal_syscall3(3, fd, buf, count)
#undef internal_syscall3
#define internal_syscall3(number, arg1, arg2, arg3) \
({ \
unsigned long int resultvar; \
TYPEFY (arg3, __arg3) = ARGIFY (arg3); \
TYPEFY (arg2, __arg2) = ARGIFY (arg2); \
TYPEFY (arg1, __arg1) = ARGIFY (arg1); \
register TYPEFY (arg3, _a3) asm ("rdx") = __arg3; \
register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
asm volatile ( \
"syscall\n\t" \
: "=a" (resultvar) \
: "0" (number), "r" (_a1), "r" (_a2), "r" (_a3) \
: "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
(long int) resultvar; \
})
通过汇编直接调用系统write
流程总结
write() <-- 用户 API
↓
__libc_write() <-- glibc 实现
↓
SYSCALL_CANCEL(write, fd, buf, nbytes) <-- 处理线程取消点
↓
INLINE_SYSCALL_CALL(write, fd, buf, nbytes)
↓
INLINE_SYSCALL(write, 3, fd, buf, nbytes) <-- 根据参数个数自动展开
↓
__syscall_write(fd, buf, nbytes) <-- 如果未定义内联实现
↓
syscall(SYS_write, fd, buf, nbytes) <-- 系统调用入口
↓
内核实现 sys_write()
本文来自博客园,作者:只A有缘人,转载请注明原文链接:https://www.cnblogs.com/cq429958/p/19326414
浙公网安备 33010602011771号