Glibc中write() 的实现与系统调用入口机制

源码

#include <unistd.h>

int main() {
    write(1,"hello,world\n",12);
    return 0;
}

分析

writeLinux 内核提供的系统调用。Glibc 提供了一个名为 write() 的函数,它是对该系统调用的封装。在

sysdeps/unix/sysv/linux/write.c中:

ssize_t __libc_write (int fd, const void *buf, size_t nbytes)
{
  return SYSCALL_CANCEL (write, fd, buf, nbytes);
}

libc_hidden_def (__libc_write)
weak_alias (__libc_write, __write)
libc_hidden_weak (__write)
weak_alias (__libc_write, write)
libc_hidden_weak (write)

通过SYSCALL_CANCEL进行包装,在sysdeps/unix/sysdep.h

// sysdeps/unix/sysdep.h
#define SYSCALL_CANCEL(...) \
  ({									     \
    long int sc_ret;							     \
    if (NO_SYSCALL_CANCEL_CHECKING)					     \
      sc_ret = INLINE_SYSCALL_CALL (__VA_ARGS__); 			     \
    else								     \
      {									     \
	    int sc_cancel_oldtype = LIBC_CANCEL_ASYNC ();			     \
	    sc_ret = INLINE_SYSCALL_CALL (__VA_ARGS__);			     \
        LIBC_CANCEL_RESET (sc_cancel_oldtype);				     \
      }									     \
    sc_ret;								     \
  })

通过INLINE_SYSCALL_CALL又进行一次包装,

// sysdeps/unix/sysdep.h
#define __SYSCALL_CONCAT_X(a,b)     a##b
#define __SYSCALL_CONCAT(a,b)       __SYSCALL_CONCAT_X (a, b)

#define __INLINE_SYSCALL3(name, a1, a2, a3) \
  INLINE_SYSCALL (name, 3, a1, a2, a3)

#define __INLINE_SYSCALL_NARGS_X(a,b,c,d,e,f,g,h,n,...) n
#define __INLINE_SYSCALL_NARGS(...) \
  __INLINE_SYSCALL_NARGS_X (__VA_ARGS__,7,6,5,4,3,2,1,0,)
#define __INLINE_SYSCALL_DISP(b,...) \
  __SYSCALL_CONCAT (b,__INLINE_SYSCALL_NARGS(__VA_ARGS__))(__VA_ARGS__)

#define INLINE_SYSCALL_CALL(...) \
  __INLINE_SYSCALL_DISP (__INLINE_SYSCALL, __VA_ARGS__)

INLINE_SYSCALL_CALL(write, fd, buf, count)等价于
__INLINE_SYSCALL_DISP(__INLINE_SYSCALL, write, fd, buf, count) 
我们需要先求 __INLINE_SYSCALL_NARGS(write, fd, buf, count)
展开为
__INLINE_SYSCALL_NARGS_X(write, fd, buf, count, 7,6,5,4,3,2,1,0)
#define __INLINE_SYSCALL_NARGS_X(a,b,c,d,e,f,g,h,n,...) n
一一对应
a --> write
b --> fd
c --> buf
......
n -->3 
所以
__INLINE_SYSCALL_NARGS(write, fd, buf, count) → 3
得到
__SYSCALL_CONCAT(__INLINE_SYSCALL, 3)
即
__INLINE_SYSCALL3(write, fd, buf, count)
根据
#define __INLINE_SYSCALL3(name, a1, a2, a3) \
  INLINE_SYSCALL(name, 3, a1, a2, a3)
得到
__INLINE_SYSCALL3(write, fd, buf, count)
↓
INLINE_SYSCALL(write, 3, fd, buf, count)

// sysdeps/unix/sysv/linux/sysdep.h
#undef INLINE_SYSCALL
#define INLINE_SYSCALL(name, nr, args...)				\
  ({									\
    long int sc_ret = INTERNAL_SYSCALL (name, nr, args);		\
    __glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (sc_ret))		\
    ? SYSCALL_ERROR_LABEL (INTERNAL_SYSCALL_ERRNO (sc_ret))		\
    : sc_ret;								\
  })
最终调用 INTERNAL_SYSCALL(write, 3, fd, buf, count)

// sysdeps/unix/sysv/linux/x86_64/sysdep.h
#undef INTERNAL_SYSCALL
#define INTERNAL_SYSCALL(name, nr, args...)				\
	internal_syscall##nr (SYS_ify (name), args)
变为
internal_syscall3(3, fd, buf, count)

#undef internal_syscall3
#define internal_syscall3(number, arg1, arg2, arg3)			\
({									\
    unsigned long int resultvar;					\
    TYPEFY (arg3, __arg3) = ARGIFY (arg3);			 	\
    TYPEFY (arg2, __arg2) = ARGIFY (arg2);			 	\
    TYPEFY (arg1, __arg1) = ARGIFY (arg1);			 	\
    register TYPEFY (arg3, _a3) asm ("rdx") = __arg3;			\
    register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;			\
    register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
    asm volatile (							\
    "syscall\n\t"							\
    : "=a" (resultvar)							\
    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3)			\
    : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
    (long int) resultvar;						\
})
通过汇编直接调用系统write

流程总结

write()        <-- 用户 API
  ↓
__libc_write() <-- glibc 实现
  ↓
SYSCALL_CANCEL(write, fd, buf, nbytes)   <-- 处理线程取消点
  ↓
INLINE_SYSCALL_CALL(write, fd, buf, nbytes)
  ↓
INLINE_SYSCALL(write, 3, fd, buf, nbytes) <-- 根据参数个数自动展开
  ↓
__syscall_write(fd, buf, nbytes)         <-- 如果未定义内联实现
  ↓
syscall(SYS_write, fd, buf, nbytes)      <-- 系统调用入口
  ↓
内核实现 sys_write()
posted @ 2025-12-09 14:13  只A有缘人  阅读(1)  评论(0)    收藏  举报