08-SO加载解析过程

1 JAVA层函数调用关系

本文通过分析Android 4.4版本的源码,了解linker是如何加载并链接SO文件。在阅读本文之前,读者最好阅读有关ELF的文件格式,可以阅读《ELF文件结构学习》深入了解ELF的文件结构。

so加载的全局流程函数关系如下图所示:

Java层的函数调用关系图如下:

我们从下述JAVA层中加载so函数开始作为入口点进行追踪。

System.loadLibrary(“libxxx.so”);
public static void loadLibrary(String libName) {

Runtime.getRuntime().loadLibrary(libName, VMStack.getCallingClassLoader());

}

 该函数位置:libcore\luni\src\main\java\java\lang\System.java。函数定义如下:

loadLibrary函数内调用了Runtime类里的loadLibrary函数。该函数主要是搜索so库,加载并链接so文件。

该函数位置:libcore\luni\src\main\java\java\lang\Runtime.java。函数关键代码如下:

void loadLibrary(String libraryName, ClassLoader loader) {

        if (loader != null) {
            //1.获取SO路径
            String filename = loader.findLibrary(libraryName);
            if (filename == null) {
                throw new UnsatisfiedLinkError("Couldn't load " + libraryName + " from loader " + loader + ": findLibrary returned null");
            }

            //2.加载SO文件
            String error = doLoad(filename, loader);
            if (error != null) {
                throw new UnsatisfiedLinkError(error);
            }
            return;
        }
    ……

}

我们接着跟进doLoad函数。该函数位置:libcore\luni\src\main\java\java\lang\Runtime.java。函数定义如下:

private String doLoad(String name, ClassLoader loader) {

        String ldLibraryPath = null;

        if (loader != null && loader instanceof BaseDexClassLoader) {

            ldLibraryPath = ((BaseDexClassLoader) loader).getLdLibraryPath();

        }

                synchronized (this) {

            //调用native函数加载SO

            return nativeLoad(name, loader, ldLibraryPath);

        }

    }

可以看到,doLoad调用了native函数nativeLoad加载SO,下一节我们从该函数继续追踪分析。

2 Native层函数调用关系

Native层函数调用关系如上图。

继续上一节的分析,查看Android源码,nativeLoad的函数位置:dalvik\vm\native\java_lang_Runtime.cpp。

其具体函数定义如下:

static void Dalvik_java_lang_Runtime_nativeLoad(const u4* args, JValue* pResult)
{

    StringObject* fileNameObj = (StringObject*) args[0];
    Object* classLoader = (Object*) args[1];
    char* fileName = NULL;
    StringObject* result = NULL;
    char* reason = NULL;
    bool success;
    assert(fileNameObj != NULL);
    fileName = dvmCreateCstrFromString(fileNameObj);
    success = dvmLoadNativeCode(fileName, classLoader, &reason);

    if (!success) {
        const char* msg = (reason != NULL) ? reason : "unknown failure";
        result = dvmCreateStringFromCstr(msg);
        dvmReleaseTrackedAlloc((Object*) result, NULL);
    }

    free(reason);
    free(fileName);
    RETURN_PTR(result);
}

关键代码如下:Dalvik_java_lang_Runtime_nativeLoad调用了dvmLoadNativeCode函数来加载so文件,fileName是so文件的路径。dvmLoadNativeCode函数位置:dalvik\vm\Native.cpp。

bool dvmLoadNativeCode(const char* pathName, Object* classLoader, char** detail)
{

   SharedLib* pEntry;
    ......

   //检查so是否已经加载过
   pEntry = findSharedLibEntry(pathName);
    if (pEntry != NULL) {
        if (pEntry->classLoader != classLoader) {
            ALOGW("Shared lib '%s' already opened by CL %p; can't open in %p",pathName, pEntry->classLoader, classLoader);
            return false;
        }

        if (verbose) {
            ALOGD("Shared lib '%s' already loaded in same CL %p",pathName, classLoader);
        }

        if (!checkOnLoadResult(pEntry))
            return false;

        return true;
    }

   ...

    //第一次加载so文件
    handle = dlopen(pathName, RTLD_LAZY);
    ......

    /* create a new entry */
    SharedLib* pNewEntry;
    pNewEntry = (SharedLib*) calloc(1, sizeof(SharedLib));
    pNewEntry->pathName = strdup(pathName);
    pNewEntry->handle = handle;
    pNewEntry->classLoader = classLoader;
    dvmInitMutex(&pNewEntry->onLoadLock);
    pthread_cond_init(&pNewEntry->onLoadCond, NULL);
    pNewEntry->onLoadThreadId = self->threadId;

    //添加到lib动态列表中
    SharedLib* pActualEntry = addSharedLibEntry(pNewEntry);
    if (pNewEntry != pActualEntry) {
        LOGI("WOW: we lost a race to add a shared lib (%s CL=%p)",pathName, classLoader);
        freeSharedLibEntry(pNewEntry);
        return checkOnLoadResult(pActualEntry);
    } else {
        ......
        bool result = false;
        void* vonLoad;
        int version;
         //获取"JNI_OnLoad"函数的符号地址
        vonLoad = dlsym(handle, "JNI_OnLoad");
       if (vonLoad == NULL) {
            ALOGD("No JNI_OnLoad found in %s %p, skipping init", pathName,    classLoader);
            result = true;
        } else {
                //调用JNI_OnLoad函数
                OnLoadFunc func = (OnLoadFunc)vonLoad;
                ......
                version = (*func)(gDvmJni.jniVm, NULL);
        }
      ......
 }

        if (result)
            pNewEntry->onLoadResult = kOnLoadOkay;
        else
            pNewEntry->onLoadResult = kOnLoadFailed;

        pNewEntry->onLoadThreadId = 0;

      ......

}
void* dlopen(const char* filename, int flags) {

  ScopedPthreadMutexLocker locker(&gDlMutex);

  soinfo* result = do_dlopen(filename, flags);

  if (result == NULL) {

    __bionic_format_dlerror("dlopen failed", linker_get_error_buffer());

    return NULL;

  }

  return result;

}

dlopen调用do_dlopen函数打开so文件并返回soinfo结构指针对象。我们在linker.h文件查看soinfo结构体的定义:查看dlopen函数,位置:bionic\linker\dlfcn.c。函数定义如下:

struct soinfo {

 public:

  char name[SOINFO_NAME_LEN];
  const Elf32_Phdr* phdr; //program header table
  size_t phnum; //program header table表项个数
  Elf32_Addr entry;  //程序入口,对于可执行文件
  Elf32_Addr base;  
  unsigned size;
 
  uint32_t unused1;  // DO NOT USE, maintained for compatibility

  Elf32_Dyn* dynamic; //dynamic link table

  uint32_t unused2; // DO NOT USE, maintained for compatibility
  uint32_t unused3; // DO NOT USE, maintained for compatibility

  soinfo* next;
  unsigned flags;


  const char* strtab;  //对应”.shstrtab”节区
  Elf32_Sym* symtab;  //对应”.dynsym”节区

  //跟hash表相关,参考”.hash”哈希表结构

  size_t nbucket;
  size_t nchain;
  unsigned* bucket;
  unsigned* chain;

  unsigned* plt_got;

  Elf32_Rel* plt_rel;
  size_t plt_rel_count;

  Elf32_Rel* rel;
  size_t rel_count;

  linker_function_t* preinit_array;
  size_t preinit_array_count;

  linker_function_t* init_array;
  size_t init_array_count;
  linker_function_t* fini_array;
  size_t fini_array_count;

  linker_function_t init_func;
  linker_function_t fini_func;

  unsigned* ARM_exidx;
  size_t ARM_exidx_count;

  size_t ref_count;
  link_map_t link_map;

  bool constructors_called;

  // When you read a virtual address from the ELF file, add this
  // value to get the corresponding address in the process' address space.
  Elf32_Addr load_bias;

  bool has_text_relocations;
  bool has_DT_SYMBOLIC;

  void CallConstructors();
  void CallDestructors();
  void CallPreInitConstructors();

 private:
  void CallArray(const char* array_name, linker_function_t* functions, size_t count, bool reverse);

  void CallFunction(const char* function_name, linker_function_t function);

};
struct soinfo {

 public:

  char name[SOINFO_NAME_LEN];

  const Elf32_Phdr* phdr; //program header table

  size_t phnum; //program header table表项个数

  Elf32_Addr entry;  //程序入口,对于可执行文件

  Elf32_Addr base;  

  unsigned size;

 

  uint32_t unused1;  // DO NOT USE, maintained for compatibility.

 

  Elf32_Dyn* dynamic; //dynamic link table

 

  uint32_t unused2; // DO NOT USE, maintained for compatibility

  uint32_t unused3; // DO NOT USE, maintained for compatibility

 

  soinfo* next;

  unsigned flags;

 

  const char* strtab;  //对应”.shstrtab”节区

  Elf32_Sym* symtab;  //对应”.dynsym”节区

 

  //跟hash表相关,参考”.hash”哈希表结构

  size_t nbucket;

  size_t nchain;

  unsigned* bucket;

  unsigned* chain;

 

  unsigned* plt_got;

 

  Elf32_Rel* plt_rel;

  size_t plt_rel_count;

 

  Elf32_Rel* rel;

  size_t rel_count;

 

  linker_function_t* preinit_array;

  size_t preinit_array_count;

 

  linker_function_t* init_array;

  size_t init_array_count;

  linker_function_t* fini_array;

  size_t fini_array_count;

 

  linker_function_t init_func;

  linker_function_t fini_func;

 

  unsigned* ARM_exidx;

  size_t ARM_exidx_count;

 

  size_t ref_count;

  link_map_t link_map;

 

  bool constructors_called;

 

  // When you read a virtual address from the ELF file, add this

  // value to get the corresponding address in the process' address space.

  Elf32_Addr load_bias;

 

  bool has_text_relocations;

  bool has_DT_SYMBOLIC;

 

  void CallConstructors();

  void CallDestructors();

  void CallPreInitConstructors();

 

 private:

  void CallArray(const char* array_name, linker_function_t* functions, size_t count, bool reverse);

  void CallFunction(const char* function_name, linker_function_t function);

};

接下来查看do_dlopen函数的定义,其位置:bionic\linker\linker.cpp。soinfo结构体的理解结合ELF文件格式学习。

soinfo* do_dlopen(const char* name, int flags) {

  if ((flags & ~(RTLD_NOW|RTLD_LAZY|RTLD_LOCAL|RTLD_GLOBAL)) != 0) {

    DL_ERR("invalid flags to dlopen: %x", flags);

    return NULL;

  }

  set_soinfo_pool_protection(PROT_READ | PROT_WRITE);

  soinfo* si = find_library(name);

  if (si != NULL) {

    si->CallConstructors();

  }

  set_soinfo_pool_protection(PROT_READ);

  return si;

}

find_library函数位置:bionic\linker\linker.cpp。函数定义如下:do_dlopen调用find_library函数加载并链接so文件,然后返回soinfo指针对象,如果加载链接成功,则调用CallConstructors进行初始化工作,跟”.init_array”节区相关。

static soinfo* find_library(const char* name) {

  soinfo* si = find_library_internal(name);

  if (si != NULL) {

    si->ref_count++;

  }

  return si;

}
static soinfo* find_library_internal(const char* name) {

  if (name == NULL) {

    return somain;

  }

  //从so列表中查找目标so

  soinfo* si = find_loaded_library(name);

  if (si != NULL) {

    if (si->flags & FLAG_LINKED) {

      return si;

    }

    DL_ERR("OOPS: recursive link to \"%s\"", si->name);

    return NULL;

  }

  //加载

  si = load_library(name);

  if (si == NULL) {

    return NULL;

  }

  //链接

  if (!soinfo_link_image(si)) {

    munmap(reinterpret_cast<void*>(si->base), si->size);

    soinfo_free(si);

    return NULL;

  }

  return si;

}

接下来,我们继续跟踪分析find_library_internal,其位置:bionic\linker\linker.cpp。函数定义如下:

该函数的主要实现加载和链接的过程,是最关键的代码部分,其函数调用关系的全局图如下:

3Native层-加载so到内存

首先观察加载过程,load_library函数位置:bionic\linker\linker.cpp。函数定义如下:

static soinfo* load_library(const char* name) {

    // 1.打开so文件

    int fd = open_library(name);

    if (fd == -1) {

        DL_ERR("library \"%s\" not found", name);

        return NULL;

    }

//2.读取ELF头文件并加载segment到内存

    ElfReader elf_reader(name, fd);

    if (!elf_reader.Load()) {

        return NULL;

    }

    const char* bname = strrchr(name, '/');

    soinfo* si = soinfo_alloc(bname ? bname + 1 : name);

    if (si == NULL) {

        return NULL;

}

 

//3.对soinfo对象进行赋值

    si->base = elf_reader.load_start();

    si->size = elf_reader.load_size();

    si->load_bias = elf_reader.load_bias();

    si->flags = 0;

    si->entry = 0;

    si->dynamic = NULL;

    si->phnum = elf_reader.phdr_count();

    si->phdr = elf_reader.loaded_phdr();

    return si;

}

3.1 打开so文件

open_library函数位置:bionic\linker\linker.cpp。其函数定义如下:

static int open_library(const char* name) {

  TRACE("[ opening %s ]", name);

 

  // If the name contains a slash, we should attempt to open it directly and not search the paths.

  if (strchr(name, '/') != NULL) {

    int fd = TEMP_FAILURE_RETRY(open(name, O_RDONLY | O_CLOEXEC));

    if (fd != -1) {

      return fd;

    }

    // ...but nvidia binary blobs (at least) rely on this behavior, so fall through for now.

  }

 

  // Otherwise we try LD_LIBRARY_PATH first, and fall back to the built-in well known paths.

  int fd = open_library_on_path(name, gLdPaths);

  if (fd == -1) {

    fd = open_library_on_path(name, gSoPaths);

  }

  return fd;

}

3.2读取ELF头文件并加载segment到内存

ElfReader::load函数位置:bionic\linker\linker_phdr.cpp,其函数定义如下:

bool ElfReader::Load() {

  return ReadElfHeader() &&

         VerifyElfHeader() &&

         ReadProgramHeader() &&

         ReserveAddressSpace() &&

         LoadSegments() &&

         FindPhdr();

}

可见Load函数调用了6个子函数,其过程可以分成3部分,如下图所示:

(1) 读取ELF头部

ReadElfHeader函数定义如下:

bool ElfReader::ReadElfHeader() {

  ssize_t rc = TEMP_FAILURE_RETRY(read(fd_, &header_, sizeof(header_)));

  if (rc < 0) {

    DL_ERR("can't read file \"%s\": %s", name_, strerror(errno));

    return false;

  }

  if (rc != sizeof(header_)) {

    DL_ERR("\"%s\" is too small to be an ELF executable", name_);

    return false;

  }

  return true;

}

如上,通过IO操作和文件符号读取ELF头部。header_是ElfReader类的成员(查看linker_phdr.h投文件)。

(2) 验证ELF头部

VerifyElfHeader函数定义如下:

bool ElfReader::VerifyElfHeader() {

  //检查magic number是否为”\177ELF”

  if (header_.e_ident[EI_MAG0] != ELFMAG0 ||

      header_.e_ident[EI_MAG1] != ELFMAG1 ||

      header_.e_ident[EI_MAG2] != ELFMAG2 ||

      header_.e_ident[EI_MAG3] != ELFMAG3) {

    DL_ERR("\"%s\" has bad ELF magic", name_);

    return false;

  }

  //检查其位数是否为32位

  if (header_.e_ident[EI_CLASS] != ELFCLASS32) {

    DL_ERR("\"%s\" not 32-bit: %d", name_, header_.e_ident[EI_CLASS]);

    return false;

  }

  //检查so文件是否是小段字节序

  if (header_.e_ident[EI_DATA] != ELFDATA2LSB) {

    DL_ERR("\"%s\" not little-endian: %d", name_, header_.e_ident[EI_DATA]);

    return false;

  }

  //检查so文件是否为共享目标文件

  if (header_.e_type != ET_DYN) {

    DL_ERR("\"%s\" has unexpected e_type: %d", name_, header_.e_type);

    return false;

  }

  //检查版本号是否为1

  if (header_.e_version != EV_CURRENT) {

    DL_ERR("\"%s\" has unexpected e_version: %d", name_, header_.e_version);

    return false;

  }

  //检查是否是ARM、MIPS或386平台

  if (header_.e_machine !=

#ifdef ANDROID_ARM_LINKER

      EM_ARM

#elif defined(ANDROID_MIPS_LINKER)

      EM_MIPS

#elif defined(ANDROID_X86_LINKER)

      EM_386

#endif

  ) {

    DL_ERR("\"%s\" has unexpected e_machine: %d", name_, header_.e_machine);

    return false;

  }

 

  return true;

}

该函数主要用于检查ELF头部某些字段是否合法。

(3) 读取程序头部

ReadProgramHeader函数定义如下:

bool ElfReader::ReadProgramHeader() {

  phdr_num_ = header_.e_phnum;

 

  // Like the kernel, we only accept program header tables that

  // are smaller than 64KiB.

  if (phdr_num_ < 1 || phdr_num_ > 65536/sizeof(Elf32_Phdr)) {

    DL_ERR("\"%s\" has invalid e_phnum: %d", name_, phdr_num_);

    return false;

  }

  //获取program header table的大小范围

  Elf32_Addr page_min = PAGE_START(header_.e_phoff);

  Elf32_Addr page_max = PAGE_END(header_.e_phoff + (phdr_num_ * sizeof(Elf32_Phdr)));

  Elf32_Addr page_offset = PAGE_OFFSET(header_.e_phoff);

 

  phdr_size_ = page_max - page_min;

 

  void* mmap_result = mmap(NULL, phdr_size_, PROT_READ, MAP_PRIVATE, fd_, page_min);

  if (mmap_result == MAP_FAILED) {

    DL_ERR("\"%s\" phdr mmap failed: %s", name_, strerror(errno));

    return false;

  }

 

  phdr_mmap_ = mmap_result;

  phdr_table_ = reinterpret_cast<Elf32_Phdr*>(reinterpret_cast<char*>(mmap_result) + page_offset);

  return true;

}

该函数主要作用是分配足够大的内存空间用于加载program header table。

(4) 分配内存空间

ReserveAddressSpace函数定义如下:

bool ElfReader::ReserveAddressSpace() {

  Elf32_Addr min_vaddr;

  //获取program header table中所有LOAD属性的segment的大小范围

  load_size_ = phdr_table_get_load_size(phdr_table_, phdr_num_, &min_vaddr);

  if (load_size_ == 0) {

    DL_ERR("\"%s\" has no loadable segments", name_);

    return false;

  }

 

  uint8_t* addr = reinterpret_cast<uint8_t*>(min_vaddr);

  int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS;

  //给所有LOAD属性的segment分配足够大的内存空间

  void* start = mmap(addr, load_size_, PROT_NONE, mmap_flags, -1, 0);

  if (start == MAP_FAILED) {

    DL_ERR("couldn't reserve %d bytes of address space for \"%s\"", load_size_, name_);

    return false;

  }

 

  load_start_ = start;

  load_bias_ = reinterpret_cast<uint8_t*>(start) - addr;

  return true;

}

分配足够大的内存空间用于加载program header table中LOAD属性的segment空间。

(5) 加载segments到内存

LoadSegments函数定义如下:

bool ElfReader::LoadSegments() {

  for (size_t i = 0; i < phdr_num_; ++i) {

    const Elf32_Phdr* phdr = &phdr_table_[i];

    //只映射LOAD属性的segment

    if (phdr->p_type != PT_LOAD) {

      continue;

    }

 

    // Segment addresses in memory.

    Elf32_Addr seg_start = phdr->p_vaddr + load_bias_;

    Elf32_Addr seg_end   = seg_start + phdr->p_memsz;

 

    Elf32_Addr seg_page_start = PAGE_START(seg_start);

    Elf32_Addr seg_page_end   = PAGE_END(seg_end);

 

    Elf32_Addr seg_file_end   = seg_start + phdr->p_filesz;

 

    // File offsets.

    Elf32_Addr file_start = phdr->p_offset;

    Elf32_Addr file_end   = file_start + phdr->p_filesz;

 

    Elf32_Addr file_page_start = PAGE_START(file_start);

    Elf32_Addr file_length = file_end - file_page_start;

 

    if (file_length != 0) {

      void* seg_addr = mmap((void*)seg_page_start,

                            file_length,

                            PFLAGS_TO_PROT(phdr->p_flags),

                            MAP_FIXED|MAP_PRIVATE,

                            fd_,

                            file_page_start);

      if (seg_addr == MAP_FAILED) {

        DL_ERR("couldn't map \"%s\" segment %d: %s", name_, i, strerror(errno));

        return false;

      }

    }

 

    // if the segment is writable, and does not end on a page boundary,

    // zero-fill it until the page limit.

    if ((phdr->p_flags & PF_W) != 0 && PAGE_OFFSET(seg_file_end) > 0) {

      memset((void*)seg_file_end, 0, PAGE_SIZE - PAGE_OFFSET(seg_file_end));

    }

 

    seg_file_end = PAGE_END(seg_file_end);

 

    // seg_file_end is now the first page address after the file

    // content. If seg_end is larger, we need to zero anything

    // between them. This is done by using a private anonymous

    // map for all extra pages.

    if (seg_page_end > seg_file_end) {

      void* zeromap = mmap((void*)seg_file_end,

                           seg_page_end - seg_file_end,

                           PFLAGS_TO_PROT(phdr->p_flags),

                           MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE,

                           -1,

                           0);

      if (zeromap == MAP_FAILED) {

        DL_ERR("couldn't zero fill \"%s\" gap: %s", name_, strerror(errno));

        return false;

      }

    }

  }

  return true;

}

映射program header table中所有LOAD属性的segment到之前分配的内存空间中。

(6) 查找程序头部表

FindPhdr函数定义如下:

bool ElfReader::FindPhdr() {

  const Elf32_Phdr* phdr_limit = phdr_table_ + phdr_num_;

 

  //如果有PT_PHDR属性的segment,直接使用

  for (const Elf32_Phdr* phdr = phdr_table_; phdr < phdr_limit; ++phdr) {

    if (phdr->p_type == PT_PHDR) {

      return CheckPhdr(load_bias_ + phdr->p_vaddr);

    }

  }

 

  // Otherwise, check the first loadable segment. If its file offset

  // is 0, it starts with the ELF header, and we can trivially find the

  // loaded program header from it.

  for (const Elf32_Phdr* phdr = phdr_table_; phdr < phdr_limit; ++phdr) {

    if (phdr->p_type == PT_LOAD) {

      if (phdr->p_offset == 0) {

        Elf32_Addr  elf_addr = load_bias_ + phdr->p_vaddr;

        const Elf32_Ehdr* ehdr = (const Elf32_Ehdr*)(void*)elf_addr;

        Elf32_Addr  offset = ehdr->e_phoff;

        return CheckPhdr((Elf32_Addr)ehdr + offset);

      }

      break;

    }

  }

 

  DL_ERR("can't find loaded phdr for \"%s\"", name_);

  return false;

}

该函数其实主要是检测Program Header是否在LOAD属性的segment范围内。

继续查看CheckPhdr的函数定义:

bool ElfReader::CheckPhdr(Elf32_Addr loaded) {

  const Elf32_Phdr* phdr_limit = phdr_table_ + phdr_num_;

  Elf32_Addr loaded_end = loaded + (phdr_num_ * sizeof(Elf32_Phdr));

  for (Elf32_Phdr* phdr = phdr_table_; phdr < phdr_limit; ++phdr) {

    if (phdr->p_type != PT_LOAD) {

      continue;

    }

    Elf32_Addr seg_start = phdr->p_vaddr + load_bias_;

    Elf32_Addr seg_end = phdr->p_filesz + seg_start;

    if (seg_start <= loaded && loaded_end <= seg_end) {

      loaded_phdr_ = reinterpret_cast<const Elf32_Phdr*>(loaded);

      return true;

    }

  }

  DL_ERR("\"%s\" loaded phdr %x not in loadable segment", name_, loaded);

  return false;

}


至此,加载so的过程就完成了,下面开始进行链接。

4 Native层-链接so文件

soinfo_link_image函数位置:bionic\linker\linker.cpp,其关键代码如下:

static int soinfo_link_image(soinfo *si)

{

    unsigned *d;

    /* "base" might wrap around UINT32_MAX. */

    Elf32_Addr base = si->load_bias;

    const Elf32_Phdr *phdr = si->phdr;

    int phnum = si->phnum;

    int relocating_linker = (si->flags & FLAG_LINKER) != 0;

    soinfo **needed, **pneeded;

    size_t dynamic_count;

    /* We can't debug anything until the linker is relocated */

    if (!relocating_linker) {

        INFO("[ %5d linking %s ]\n", pid, si->name);

        DEBUG("%5d si->base = 0x%08x si->flags = 0x%08x\n", pid,

            si->base, si->flags);

    }

    //获取loadable segment中的dynamic section的地址和大小

    phdr_table_get_dynamic_section(phdr, phnum, base, &si->dynamic,

                                   &dynamic_count);

    if (si->dynamic == NULL) {

        if (!relocating_linker) {

            DL_ERR("missing PT_DYNAMIC?!");

        }

        goto fail;

    } else {

        if (!relocating_linker) {

            DEBUG("%5d dynamic = %p\n", pid, si->dynamic);

        }

    }

#ifdef ANDROID_ARM_LINKER

    (void) phdr_table_get_arm_exidx(phdr, phnum, base,

                                    &si->ARM_exidx, &si->ARM_exidx_count);

#endif

//1.从".dynamic " section中提取有用新消息

for(d = si->dynamic; *d; d++){

        //将dynamic section中的信息赋值到si对象中

        switch(*d++){

       //((unsigned *) (si->base + *d)指向dynamic link table中的数组成员

        case DT_HASH:

            //d_tag=DT_HASH,则p_ptr指向哈希表的地址

            si->nbucket = ((unsigned *) (si->base + *d))[0];

            si->nchain = ((unsigned *) (si->base + *d))[1];

            si->bucket = (unsigned *) (si->base + *d + 8);

            si->chain = (unsigned *) (si->base + *d + 8 + si->nbucket * 4);

            break;

        case DT_STRTAB:

            //对应.strtab section

            si->strtab = (const char *) (si->base + *d);

            break;

        case DT_SYMTAB:

             si->symtab = (Elf32_Sym *) (si->base + *d);

            break;

         ...

        }

    }

...

if(si->flags & FLAG_EXE) {

    //如果是可执行文件...

    ...

}

   /* dynamic_count is an upper bound for the number of needed libs */

    pneeded = needed = (soinfo**) alloca((1 + dynamic_count) * sizeof(soinfo*));

    //根据symbol link table成员中DT_NEEDED属性,获取当前so依赖的其他库文件并进行加载链接

    for(d = si->dynamic; *d; d += 2) {

        if(d[0] == DT_NEEDED){

            DEBUG("%5d %s needs %s\n", pid, si->name, si->strtab + d[1]);

            soinfo *lsi = find_library(si->strtab + d[1]);

            if(lsi == 0) {

                strlcpy(tmp_err_buf, linker_get_error(), sizeof(tmp_err_buf));

                DL_ERR("could not load library \"%s\" needed by \"%s\"; caused by %s",

                       si->strtab + d[1], si->name, tmp_err_buf);

                goto fail;

            }

            *pneeded++ = lsi;

            lsi->refcount++;

        }

    }

   *pneeded = NULL;

    if (si->has_text_relocations) {

    //如果有DT_TEXTREL节区,重新设置LOAD属性的segment为writable,因为重定位要修正引用位置

        if (phdr_table_unprotect_segments(si->phdr, si->phnum, si->load_bias) < 0) {

            DL_ERR("can't unprotect loadable segments for \"%s\": %s",

                   si->name, strerror(errno));

            goto fail;

        }

    }

   

  //符号重定位,对应".rel.plt"

  if(si->plt_rel) {

        DEBUG("[ %5d relocating %s plt ]\n", pid, si->name );

        if(soinfo_relocate(si, si->plt_rel, si->plt_rel_count, needed))

            goto fail;

    }

    //符号重定位,对应".rel.dyn"

    if(si->rel) {

        DEBUG("[ %5d relocating %s ]\n", pid, si->name );

        if(soinfo_relocate(si, si->rel, si->rel_count, needed))

            goto fail;

    }

   si->flags |= FLAG_LINKED;

    DEBUG("[ %5d finished linking %s ]\n", pid, si->name);

    if (si->has_text_relocations) {

        /* All relocations are done, we can protect our segments back to read-only. */

        if (phdr_table_protect_segments(si->phdr, si->phnum, si->load_bias) < 0) {

            DL_ERR("can't protect segments for \"%s\": %s",

                   si->name, strerror(errno));

            goto fail;

        }

    }

    /* We can also turn on GNU RELRO protection */

    if (phdr_table_protect_gnu_relro(si->phdr, si->phnum, si->load_bias) < 0) {

        DL_ERR("can't enable GNU RELRO protection for \"%s\": %s",

               si->name, strerror(errno));

        goto fail;

    }

    /* If this is a SET?ID program, dup /dev/null to opened stdin,

       stdout and stderr to close a security hole described in:

    ftp://ftp.freebsd.org/pub/FreeBSD/CERT/advisories/FreeBSD-SA-02:23.stdio.asc

     */

    if (program_is_setuid) {

        nullify_closed_stdio();

    }

    notify_gdb_of_load(si);

    return 0;

fail:

    ERROR("failed to link %s\n", si->name);

    si->flags |= FLAG_ERROR;

    return -1;

}

其中phdr_table_get_dynamic_section定义如下:

void phdr_table_get_dynamic_section(const Elf32_Phdr* phdr_table,

                               int               phdr_count,

                               Elf32_Addr        load_bias,

                               Elf32_Addr**      dynamic,

                               size_t*           dynamic_count)

{

    const Elf32_Phdr* phdr = phdr_table;

    const Elf32_Phdr* phdr_limit = phdr + phdr_count;

    for (phdr = phdr_table; phdr < phdr_limit; phdr++) {

        if (phdr->p_type != PT_DYNAMIC) {

            continue;

        }

        *dynamic = (Elf32_Addr*)(load_bias + phdr->p_vaddr);

        if (dynamic_count) {

            *dynamic_count = (unsigned)(phdr->p_memsz / 8);

        }

        return;

    }

    *dynamic = NULL;

    if (dynamic_count) {

        *dynamic_count = 0;

    }

}

5 Native层-执行JNI_OnLoad函数

再回到执行完find_library函数后,另一个函数操作:si->CallConstructors

CallConstructors函数位置:bionic\linker\linker.cpp。函数定义如下:

void soinfo::CallConstructors() {

  if (constructors_called) {

    return;

  }

  constructors_called = true;

  if ((flags & FLAG_EXE) == 0 && preinit_array != NULL) {

    // The GNU dynamic linker silently ignores these, but we warn the developer.

    PRINT("\"%s\": ignoring %d-entry DT_PREINIT_ARRAY in shared library!",

          name, preinit_array_count);

  }

  //调用当前so所依赖的第三方库的CallConstructors函数

  if (dynamic != NULL) {

    for (Elf32_Dyn* d = dynamic; d->d_tag != DT_NULL; ++d) {

      if (d->d_tag == DT_NEEDED) {

        const char* library_name = strtab + d->d_un.d_val;

        TRACE("\"%s\": calling constructors in DT_NEEDED \"%s\"", name, library_name);

        find_loaded_library(library_name)->CallConstructors();

      }

    }

  }

  TRACE("\"%s\": calling constructors", name);

  // DT_INIT should be called before DT_INIT_ARRAY if both are present.

  CallFunction("DT_INIT", init_func);

  CallArray("DT_INIT_ARRAY", init_array, init_array_count, false);

}

该函数最后调用了两个函数,分别是CallFunction和CallArray。

CallFunction定义如下:

void soinfo::CallFunction(const char* function_name UNUSED, linker_function_t function) {

  if (function == NULL || reinterpret_cast<uintptr_t>(function) == static_cast<uintptr_t>(-1)) {

    return;

  }

  function();

  set_soinfo_pool_protection(PROT_READ | PROT_WRITE);

}

CallArray定义如下:

void soinfo::CallArray(const char* array_name UNUSED, linker_function_t* functions, size_t count, bool reverse) {

  if (functions == NULL) {

    return;

  }

  int begin = reverse ? (count - 1) : 0;

  int end = reverse ? -1 : count;

  int step = reverse ? -1 : 1;

  for (int i = begin; i != end; i += step) {

    CallFunction("function", functions[i]);

  }

}

参考《链接器和加载器》P62 TODO:扩展

posted @ 2023-02-01 09:48  Domefy  阅读(308)  评论(0编辑  收藏  举报