编译和链接
还是从c program “helloWorld” 说起,我用的IDE是xcode并且安装了Command Line Tools。这主要是让我们在控制台下可用gcc编译器。用xcode创建一个最简单c程序helloWorld。这里我的代码是:
// // main.c // helloWorld // // Created by xiang on 13-4-19. // Copyright (c) 2013年 xiang. All rights reserved. // #include <stdio.h> int main(int argc, const char * argv[]) { // insert code here... printf("Hello, World!\n"); return 0; }
运行输出
.看似简单的很,事实上,上面的过程可以分解为4个步骤,分别是预处理(Prepressing),编译(Compilation),汇编(Assembly),链接(Linking).接下来我们看看都发生了什么。
重要的工具是上面提到的gcc。
一、首先模拟预编译过程
输入命令
gcc -E main.c -o main.i
可以看到程序目录生成main.i文件 我们看看有什么东东。
# 1 "main.c" # 1 "<built-in>" # 1 "<command-line>" # 1 "main.c" # 9 "main.c" # 1 "/usr/include/stdio.h" 1 3 4 # 64 "/usr/include/stdio.h" 3 4 # 1 "/usr/include/sys/cdefs.h" 1 3 4 # 406 "/usr/include/sys/cdefs.h" 3 4 # 1 "/usr/include/sys/_symbol_aliasing.h" 1 3 4 # 407 "/usr/include/sys/cdefs.h" 2 3 4 # 472 "/usr/include/sys/cdefs.h" 3 4 # 1 "/usr/include/sys/_posix_availability.h" 1 3 4 # 473 "/usr/include/sys/cdefs.h" 2 3 4 # 65 "/usr/include/stdio.h" 2 3 4 # 1 "/usr/include/Availability.h" 1 3 4 # 144 "/usr/include/Availability.h" 3 4 # 1 "/usr/include/AvailabilityInternal.h" 1 3 4 # 145 "/usr/include/Availability.h" 2 3 4 # 66 "/usr/include/stdio.h" 2 3 4 # 1 "/usr/include/_types.h" 1 3 4 # 27 "/usr/include/_types.h" 3 4 # 1 "/usr/include/sys/_types.h" 1 3 4 # 33 "/usr/include/sys/_types.h" 3 4 # 1 "/usr/include/machine/_types.h" 1 3 4 # 32 "/usr/include/machine/_types.h" 3 4 # 1 "/usr/include/i386/_types.h" 1 3 4 # 37 "/usr/include/i386/_types.h" 3 4 typedef signed char __int8_t; typedef unsigned char __uint8_t; typedef short __int16_t; typedef unsigned short __uint16_t; typedef int __int32_t; typedef unsigned int __uint32_t; typedef long long __int64_t; typedef unsigned long long __uint64_t; typedef long __darwin_intptr_t; typedef unsigned int __darwin_natural_t; # 70 "/usr/include/i386/_types.h" 3 4 typedef int __darwin_ct_rune_t; typedef union { char __mbstate8[128]; long long _mbstateL; } __mbstate_t; typedef __mbstate_t __darwin_mbstate_t; typedef long int __darwin_ptrdiff_t; typedef long unsigned int __darwin_size_t; typedef __builtin_va_list __darwin_va_list; typedef int __darwin_wchar_t; typedef __darwin_wchar_t __darwin_rune_t; typedef int __darwin_wint_t; typedef unsigned long __darwin_clock_t; typedef __uint32_t __darwin_socklen_t; typedef long __darwin_ssize_t; typedef long __darwin_time_t; # 33 "/usr/include/machine/_types.h" 2 3 4 # 34 "/usr/include/sys/_types.h" 2 3 4 # 58 "/usr/include/sys/_types.h" 3 4 struct __darwin_pthread_handler_rec { void (*__routine)(void *); void *__arg; struct __darwin_pthread_handler_rec *__next; }; struct _opaque_pthread_attr_t { long __sig; char __opaque[56]; }; struct _opaque_pthread_cond_t { long __sig; char __opaque[40]; }; struct _opaque_pthread_condattr_t { long __sig; char __opaque[8]; }; struct _opaque_pthread_mutex_t { long __sig; char __opaque[56]; }; struct _opaque_pthread_mutexattr_t { long __sig; char __opaque[8]; }; struct _opaque_pthread_once_t { long __sig; char __opaque[8]; }; struct _opaque_pthread_rwlock_t { long __sig; char __opaque[192]; }; struct _opaque_pthread_rwlockattr_t { long __sig; char __opaque[16]; }; struct _opaque_pthread_t { long __sig; struct __darwin_pthread_handler_rec *__cleanup_stack; char __opaque[1168]; }; # 94 "/usr/include/sys/_types.h" 3 4 typedef __int64_t __darwin_blkcnt_t; typedef __int32_t __darwin_blksize_t; typedef __int32_t __darwin_dev_t; typedef unsigned int __darwin_fsblkcnt_t; typedef unsigned int __darwin_fsfilcnt_t; typedef __uint32_t __darwin_gid_t; typedef __uint32_t __darwin_id_t; typedef __uint64_t __darwin_ino64_t; typedef __darwin_ino64_t __darwin_ino_t; typedef __darwin_natural_t __darwin_mach_port_name_t; typedef __darwin_mach_port_name_t __darwin_mach_port_t; typedef __uint16_t __darwin_mode_t; typedef __int64_t __darwin_off_t; typedef __int32_t __darwin_pid_t; typedef struct _opaque_pthread_attr_t __darwin_pthread_attr_t; typedef struct _opaque_pthread_cond_t __darwin_pthread_cond_t; typedef struct _opaque_pthread_condattr_t __darwin_pthread_condattr_t; typedef unsigned long __darwin_pthread_key_t; typedef struct _opaque_pthread_mutex_t __darwin_pthread_mutex_t; typedef struct _opaque_pthread_mutexattr_t __darwin_pthread_mutexattr_t; typedef struct _opaque_pthread_once_t __darwin_pthread_once_t; typedef struct _opaque_pthread_rwlock_t __darwin_pthread_rwlock_t; typedef struct _opaque_pthread_rwlockattr_t __darwin_pthread_rwlockattr_t; typedef struct _opaque_pthread_t *__darwin_pthread_t; typedef __uint32_t __darwin_sigset_t; typedef __int32_t __darwin_suseconds_t; typedef __uint32_t __darwin_uid_t; typedef __uint32_t __darwin_useconds_t; typedef unsigned char __darwin_uuid_t[16]; typedef char __darwin_uuid_string_t[37]; # 28 "/usr/include/_types.h" 2 3 4 # 39 "/usr/include/_types.h" 3 4 typedef int __darwin_nl_item; typedef int __darwin_wctrans_t; typedef __uint32_t __darwin_wctype_t; # 68 "/usr/include/stdio.h" 2 3 4 typedef __darwin_va_list va_list; typedef __darwin_size_t size_t; typedef __darwin_off_t fpos_t; # 96 "/usr/include/stdio.h" 3 4 struct __sbuf { unsigned char *_base; int _size; }; struct __sFILEX; # 130 "/usr/include/stdio.h" 3 4 typedef struct __sFILE { unsigned char *_p; int _r; int _w; short _flags; short _file; struct __sbuf _bf; int _lbfsize; void *_cookie; int (*_close)(void *); int (*_read) (void *, char *, int); fpos_t (*_seek) (void *, fpos_t, int); int (*_write)(void *, const char *, int); struct __sbuf _ub; struct __sFILEX *_extra; int _ur; unsigned char _ubuf[3]; unsigned char _nbuf[1]; struct __sbuf _lb; int _blksize; fpos_t _offset; } FILE; extern FILE *__stdinp; extern FILE *__stdoutp; extern FILE *__stderrp; # 238 "/usr/include/stdio.h" 3 4 void clearerr(FILE *); int fclose(FILE *); int feof(FILE *); int ferror(FILE *); int fflush(FILE *); int fgetc(FILE *); int fgetpos(FILE * , fpos_t *); char *fgets(char * , int, FILE *); FILE *fopen(const char * , const char * ) __asm("_" "fopen" ); int fprintf(FILE * , const char * , ...) __attribute__((__format__ (__printf__, 2, 3))); int fputc(int, FILE *); int fputs(const char * , FILE * ) __asm("_" "fputs" ); size_t fread(void * , size_t, size_t, FILE * ); FILE *freopen(const char * , const char * , FILE * ) __asm("_" "freopen" ); int fscanf(FILE * , const char * , ...) __attribute__((__format__ (__scanf__, 2, 3))); int fseek(FILE *, long, int); int fsetpos(FILE *, const fpos_t *); long ftell(FILE *); size_t fwrite(const void * , size_t, size_t, FILE * ) __asm("_" "fwrite" ); int getc(FILE *); int getchar(void); char *gets(char *); void perror(const char *); int printf(const char * , ...) __attribute__((__format__ (__printf__, 1, 2))); int putc(int, FILE *); int putchar(int); int puts(const char *); int remove(const char *); int rename (const char *, const char *); void rewind(FILE *); int scanf(const char * , ...) __attribute__((__format__ (__scanf__, 1, 2))); void setbuf(FILE * , char * ); int setvbuf(FILE * , char * , int, size_t); int sprintf(char * , const char * , ...) __attribute__((__format__ (__printf__, 2, 3))); int sscanf(const char * , const char * , ...) __attribute__((__format__ (__scanf__, 2, 3))); FILE *tmpfile(void); char *tmpnam(char *); int ungetc(int, FILE *); int vfprintf(FILE * , const char * , va_list) __attribute__((__format__ (__printf__, 2, 0))); int vprintf(const char * , va_list) __attribute__((__format__ (__printf__, 1, 0))); int vsprintf(char * , const char * , va_list) __attribute__((__format__ (__printf__, 2, 0))); # 296 "/usr/include/stdio.h" 3 4 char *ctermid(char *); FILE *fdopen(int, const char *) __asm("_" "fdopen" ); int fileno(FILE *); # 318 "/usr/include/stdio.h" 3 4 int pclose(FILE *); FILE *popen(const char *, const char *) __asm("_" "popen" ); # 340 "/usr/include/stdio.h" 3 4 int __srget(FILE *); int __svfscanf(FILE *, const char *, va_list) __attribute__((__format__ (__scanf__, 2, 0))); int __swbuf(int, FILE *); static __inline int __sputc(int _c, FILE *_p) { if (--_p->_w >= 0 || (_p->_w >= _p->_lbfsize && (char)_c != '\n')) return (*_p->_p++ = _c); else return (__swbuf(_c, _p)); } # 377 "/usr/include/stdio.h" 3 4 void flockfile(FILE *); int ftrylockfile(FILE *); void funlockfile(FILE *); int getc_unlocked(FILE *); int getchar_unlocked(void); int putc_unlocked(int, FILE *); int putchar_unlocked(int); int getw(FILE *); int putw(int, FILE *); char *tempnam(const char *, const char *) __asm("_" "tempnam" ); # 414 "/usr/include/stdio.h" 3 4 typedef __darwin_off_t off_t; int fseeko(FILE *, off_t, int); off_t ftello(FILE *); int snprintf(char * , size_t, const char * , ...) __attribute__((__format__ (__printf__, 3, 4))); int vfscanf(FILE * , const char * , va_list) __attribute__((__format__ (__scanf__, 2, 0))); int vscanf(const char * , va_list) __attribute__((__format__ (__scanf__, 1, 0))); int vsnprintf(char * , size_t, const char * , va_list) __attribute__((__format__ (__printf__, 3, 0))); int vsscanf(const char * , const char * , va_list) __attribute__((__format__ (__scanf__, 2, 0))); # 442 "/usr/include/stdio.h" 3 4 typedef __darwin_ssize_t ssize_t; int dprintf(int, const char * , ...) __attribute__((__format__ (__printf__, 2, 3))) __attribute__((visibility("default"))); int vdprintf(int, const char * , va_list) __attribute__((__format__ (__printf__, 2, 0))) __attribute__((visibility("default"))); ssize_t getdelim(char ** , size_t * , int, FILE * ) __attribute__((visibility("default"))); ssize_t getline(char ** , size_t * , FILE * ) __attribute__((visibility("default"))); extern const int sys_nerr; extern const char *const sys_errlist[]; int asprintf(char **, const char *, ...) __attribute__((__format__ (__printf__, 2, 3))); char *ctermid_r(char *); char *fgetln(FILE *, size_t *); const char *fmtcheck(const char *, const char *); int fpurge(FILE *); void setbuffer(FILE *, char *, int); int setlinebuf(FILE *); int vasprintf(char **, const char *, va_list) __attribute__((__format__ (__printf__, 2, 0))); FILE *zopen(const char *, const char *, int); FILE *funopen(const void *, int (*)(void *, char *, int), int (*)(void *, const char *, int), fpos_t (*)(void *, fpos_t, int), int (*)(void *)); # 499 "/usr/include/stdio.h" 3 4 # 1 "/usr/include/secure/_stdio.h" 1 3 4 # 31 "/usr/include/secure/_stdio.h" 3 4 # 1 "/usr/include/secure/_common.h" 1 3 4 # 32 "/usr/include/secure/_stdio.h" 2 3 4 # 45 "/usr/include/secure/_stdio.h" 3 4 extern int __sprintf_chk (char * , int, size_t, const char * , ...); extern int __snprintf_chk (char * , size_t, int, size_t, const char * , ...); extern int __vsprintf_chk (char * , int, size_t, const char * , va_list); extern int __vsnprintf_chk (char * , size_t, int, size_t, const char * , va_list); # 500 "/usr/include/stdio.h" 2 3 4 # 10 "main.c" 2 int main(int argc, const char * argv[]) { printf("Hello, World!\n"); return 0; }
一堆乱七八糟的东西暂且不管。对照这个程序和源程序,首先可以知道main.i中多了一堆typedef,结构体原型,函数定义等及他们所属的文件(包含路径)。少了源代码前面的注释和一句很重要预编译指令
#include <stdio.h>
事实上那堆乱七八糟的东西就是预编译后的#include <stdio.h>的展开文件。预编译过程主要处理规则如下:
1.将所有的“#define”删除,并且展开所有宏定义。
2.处理所有条件预编译指令。
3.处理“#include”预编译指令,将被包含的文件插入到该预编译指令的位置。注意,这个过程是递归进行,就是包含的文件可能还包含其他文件。
4.删除所有的注释“//”“/**/”
5.添加行号和文件名标示,比如#2“main.c” 2,以便编译时编译器产生调试用的行号信息及产生编译错误或警告时显示的行号。
6.保留所有#pragma编译器指令,因为编译器要使用他们。
经编译后的.i文件不包含所有的宏定义。
二、模拟编译过程
编译过程就是把预处理完的文件进行一系列的词法分析、语法分析、语义分析及优化后产生的汇编代码。
输入如下命令
gcc -S main.i -o main.s
生成main.s,贴上代码
.section __TEXT,__text,regular,pure_instructions .globl _main .align 4, 0x90 _main: Leh_func_begin1: pushq %rbp Ltmp0: movq %rsp, %rbp Ltmp1: subq $32, %rsp Ltmp2: movl %edi, %eax movl %eax, -4(%rbp) movq %rsi, -16(%rbp) leaq L_.str(%rip), %rax movq %rax, %rdi callq _puts movl $0, -24(%rbp) movl -24(%rbp), %eax movl %eax, -20(%rbp) movl -20(%rbp), %eax addq $32, %rsp popq %rbp ret Leh_func_end1: .section __TEXT,__cstring,cstring_literals L_.str: .asciz "Hello, World!" .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support EH_frame0: Lsection_eh_frame: Leh_frame_common: Lset0 = Leh_frame_common_end-Leh_frame_common_begin .long Lset0 Leh_frame_common_begin: .long 0 .byte 1 .asciz "zR" .byte 1 .byte 120 .byte 16 .byte 1 .byte 16 .byte 12 .byte 7 .byte 8 .byte 144 .byte 1 .align 3 Leh_frame_common_end: .globl _main.eh _main.eh: Lset1 = Leh_frame_end1-Leh_frame_begin1 .long Lset1 Leh_frame_begin1: Lset2 = Leh_frame_begin1-Leh_frame_common .long Lset2 Ltmp3: .quad Leh_func_begin1-Ltmp3 Lset3 = Leh_func_end1-Leh_func_begin1 .quad Lset3 .byte 0 .byte 4 Lset4 = Ltmp0-Leh_func_begin1 .long Lset4 .byte 14 .byte 16 .byte 134 .byte 2 .byte 4 Lset5 = Ltmp1-Ltmp0 .long Lset5 .byte 13 .byte 6 .align 3 Leh_frame_end1: .subsections_via_symbols
不同的语言会调用不同的预编译和编译程序。C是ccl,C++是cclplus,Objective-C是cclobj。所以gcc这个命令是一个后台程序的封装。
三、模拟汇编过程
汇编器将汇编代码转换为机器可以执行的机器指令。这个过程是根据汇编指令与机器指令的对照关系一一翻译。
gcc -c main.s -o main.o
到此输出的文件main.o是目标文件。
链接过程主要包括 地址空间分配、符号决议、重定位等步骤。以后讨论

浙公网安备 33010602011771号