应用安全 --- 安卓加固 之 SO脱壳通用方法
我建议的的方法是分析so的逻辑后还原出代码后重新编译so,而不是重建
一、整体思路
text
┌─────────────────────────────────────────────────┐
│ SO脱壳方法分类 │
├──────────┬──────────┬──────────┬────────────────┤
│ 动态dump │ 静态分析 │ 模拟执行 │ 内存重建 │
│ (推荐) │ (困难) │ (折中) │ (最完整) │
└──────────┴──────────┴──────────┴────────────────┘
二、动态Dump方法(最推荐)
方法1:Frida Hook dump
JavaScript
// dump_so.js - 最常用的方案
function dump_so(so_name) {
var module = Process.findModuleByName(so_name);
if (!module) {
console.log("[-] Module not found: " + so_name);
return;
}
console.log("[+] Module: " + module.name);
console.log("[+] Base: " + module.base);
console.log("[+] Size: " + module.size);
// 方式一:整块dump
var buf = Memory.readByteArray(module.base, module.size);
var file = new File("/data/local/tmp/" + so_name + ".dump", "wb");
file.write(buf);
file.flush();
file.close();
console.log("[+] Dumped to /data/local/tmp/" + so_name + ".dump");
}
// 时机1:Hook dlopen 在加载完成后dump
function hook_dlopen() {
var android_dlopen_ext = Module.findExportByName(null, "android_dlopen_ext");
if (android_dlopen_ext) {
Interceptor.attach(android_dlopen_ext, {
onEnter: function(args) {
this.path = args[0].readCString();
},
onLeave: function(retval) {
if (this.path && this.path.indexOf("libtarget") !== -1) {
console.log("[+] Target SO loaded: " + this.path);
// 延时等待init完成(壳解密通常在init中)
setTimeout(function() {
dump_so("libtarget.so");
}, 3000);
}
}
});
}
}
// 时机2:Hook JNI_OnLoad(init之后执行,壳已解密)
function hook_jni_onload() {
var dlsym = Module.findExportByName(null, "dlsym");
Interceptor.attach(dlsym, {
onEnter: function(args) {
this.sym = args[1].readCString();
},
onLeave: function(retval) {
if (this.sym === "JNI_OnLoad" && !retval.isNull()) {
console.log("[+] JNI_OnLoad at: " + retval);
Interceptor.attach(retval, {
onEnter: function(args) {
console.log("[+] JNI_OnLoad called, dumping...");
dump_so("libtarget.so");
}
});
}
}
});
}
// 时机3:Hook mprotect 监控壳的解密操作
function hook_mprotect() {
var mprotect = Module.findExportByName(null, "mprotect");
var count = 0;
Interceptor.attach(mprotect, {
onEnter: function(args) {
var addr = args[0];
var size = args[1].toInt32();
var prot = args[2].toInt32();
console.log("[mprotect] addr=" + addr +
" size=0x" + size.toString(16) +
" prot=" + prot);
count++;
},
onLeave: function(retval) {
// 壳通常会多次调用mprotect
// 最后一次通常是恢复权限,此时已解密完成
if (count >= 3) {
dump_so("libtarget.so");
count = -999; // 防止重复dump
}
}
});
}
// 启动
hook_dlopen();
hook_jni_onload();
Bash
# 使用方式
frida -U -f com.example.app -l dump_so.js --no-pause
方法2:基于 /proc/pid/maps 的dump
Python
#!/usr/bin/env python3
"""
通过adb直接从进程内存dump SO
"""
import subprocess
import re
import struct
import sys
def adb_shell(cmd):
result = subprocess.run(
["adb", "shell", "su", "-c", cmd],
capture_output=True, text=True
)
return result.stdout
def find_pid(package_name):
output = adb_shell(f"pidof {package_name}")
return output.strip()
def dump_so_from_memory(package_name, so_name, output_path):
pid = find_pid(package_name)
if not pid:
print(f"[-] Process not found: {package_name}")
return False
print(f"[+] PID: {pid}")
# 读取 /proc/pid/maps
maps = adb_shell(f"cat /proc/{pid}/maps")
# 解析目标SO的内存映射
regions = []
base_addr = None
for line in maps.split('\n'):
if so_name in line:
match = re.match(
r'([0-9a-f]+)-([0-9a-f]+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s*(.*)',
line
)
if match:
start = int(match.group(1), 16)
end = int(match.group(2), 16)
perms = match.group(3)
offset = int(match.group(4), 16)
if base_addr is None:
base_addr = start
regions.append({
'start': start,
'end': end,
'size': end - start,
'perms': perms,
'offset': offset,
})
print(f" {line.strip()}")
if not regions:
print(f"[-] SO not found in memory maps: {so_name}")
return False
# 逐区域dump
total_size = regions[-1]['end'] - base_addr
print(f"\n[+] Total size: 0x{total_size:x}")
print(f"[+] Base address: 0x{base_addr:x}")
# 使用dd从/proc/pid/mem读取
remote_path = f"/data/local/tmp/{so_name}.dump"
for region in regions:
offset = region['start'] - base_addr
size = region['size']
cmd = (f"dd if=/proc/{pid}/mem "
f"bs=1 skip={region['start']} count={size} "
f">> {remote_path} 2>/dev/null")
adb_shell(cmd)
print(f" Dumped 0x{offset:x} - 0x{offset+size:x} ({region['perms']})")
# Pull到本地
subprocess.run(["adb", "pull", remote_path, output_path])
print(f"\n[+] Saved to {output_path}")
return True
if __name__ == '__main__':
dump_so_from_memory(
sys.argv[1], # package name
sys.argv[2], # so name
sys.argv[3] # output path
)
方法3:GDB/LLDB dump
Bash
# 附加到进程
adb forward tcp:23456 tcp:23456
adb shell su -c "gdbserver64 :23456 --attach $(pidof com.example.app)"
# 本地GDB连接
gdb-multiarch
(gdb) target remote :23456
(gdb) info proc mappings
(gdb) dump binary memory /tmp/libtarget.dump 0x7000000000 0x7000100000
三、Dump后的SO修复(关键步骤)
从内存dump出来的SO 不能直接使用,需要修复:
Python
#!/usr/bin/env python3
"""
SO Dump文件修复工具
将内存dump的SO修复为可静态加载的SO文件
"""
import struct
import sys
import os
from collections import OrderedDict
class SOFixer:
"""
修复从内存dump出来的SO文件
主要修复项:
1. 修复ELF头中的节区表
2. 修复Program Header的文件偏移
3. 修复.dynamic段中的地址引用
4. 重建Section Header Table
5. 修复重定位表
"""
PT_NULL = 0
PT_LOAD = 1
PT_DYNAMIC = 2
PT_INTERP = 3
PT_NOTE = 4
PT_PHDR = 6
PT_GNU_EH_FRAME = 0x6474e550
PT_GNU_STACK = 0x6474e551
PT_GNU_RELRO = 0x6474e552
DT_NULL = 0
DT_NEEDED = 1
DT_PLTRELSZ = 2
DT_PLTGOT = 3
DT_HASH = 4
DT_STRTAB = 5
DT_SYMTAB = 6
DT_RELA = 7
DT_RELASZ = 8
DT_RELAENT = 9
DT_STRSZ = 10
DT_SYMENT = 11
DT_INIT = 12
DT_FINI = 13
DT_SONAME = 14
DT_REL = 17
DT_RELSZ = 18
DT_RELENT = 19
DT_PLTREL = 20
DT_JMPREL = 23
DT_INIT_ARRAY = 25
DT_FINI_ARRAY = 26
DT_INIT_ARRAYSZ = 27
DT_FINI_ARRAYSZ = 28
DT_GNU_HASH = 0x6ffffef5
DT_VERSYM = 0x6ffffff0
DT_VERNEED = 0x6ffffffe
DT_VERNEEDNUM = 0x6fffffff
def __init__(self, dump_path, base_addr=0, original_so=None):
self.dump_path = dump_path
self.base_addr = base_addr
self.original_so = original_so
with open(dump_path, 'rb') as f:
self.data = bytearray(f.read())
self.original_data = None
if original_so and os.path.exists(original_so):
with open(original_so, 'rb') as f:
self.original_data = bytearray(f.read())
# 解析基本信息
self.is_64bit = self.data[4] == 2
self.is_le = self.data[5] == 1
self.endian = '<' if self.is_le else '>'
self.ptr_size = 8 if self.is_64bit else 4
self.ptr_fmt = 'Q' if self.is_64bit else 'I'
def read_ptr(self, offset):
fmt = f'{self.endian}{self.ptr_fmt}'
return struct.unpack_from(fmt, self.data, offset)[0]
def write_ptr(self, offset, value):
fmt = f'{self.endian}{self.ptr_fmt}'
struct.pack_into(fmt, self.data, offset, value)
def read_u32(self, offset):
return struct.unpack_from(f'{self.endian}I', self.data, offset)[0]
def write_u32(self, offset, value):
struct.pack_into(f'{self.endian}I', self.data, offset, value)
def read_u16(self, offset):
return struct.unpack_from(f'{self.endian}H', self.data, offset)[0]
def parse_elf_header(self):
"""解析ELF头"""
if self.is_64bit:
self.e_type = self.read_u16(16)
self.e_machine = self.read_u16(18)
self.e_entry = self.read_ptr(24)
self.e_phoff = self.read_ptr(32)
self.e_shoff = self.read_ptr(40)
self.e_ehsize = self.read_u16(52)
self.e_phentsize = self.read_u16(54)
self.e_phnum = self.read_u16(56)
self.e_shentsize = self.read_u16(58)
self.e_shnum = self.read_u16(60)
self.e_shstrndx = self.read_u16(62)
else:
self.e_type = self.read_u16(16)
self.e_machine = self.read_u16(18)
self.e_entry = self.read_ptr(24)
self.e_phoff = self.read_ptr(28)
self.e_shoff = self.read_ptr(32)
self.e_ehsize = self.read_u16(40)
self.e_phentsize = self.read_u16(42)
self.e_phnum = self.read_u16(44)
self.e_shentsize = self.read_u16(46)
self.e_shnum = self.read_u16(48)
self.e_shstrndx = self.read_u16(50)
print(f"[*] ELF {'64' if self.is_64bit else '32'}-bit")
print(f"[*] Entry: 0x{self.e_entry:x}")
print(f"[*] PHDR: off=0x{self.e_phoff:x} num={self.e_phnum} "
f"entsize={self.e_phentsize}")
print(f"[*] SHDR: off=0x{self.e_shoff:x} num={self.e_shnum} "
f"entsize={self.e_shentsize}")
def parse_program_headers(self):
"""解析程序头"""
self.phdrs = []
for i in range(self.e_phnum):
off = self.e_phoff + i * self.e_phentsize
if self.is_64bit:
p_type = self.read_u32(off)
p_flags = self.read_u32(off + 4)
p_offset = self.read_ptr(off + 8)
p_vaddr = self.read_ptr(off + 16)
p_paddr = self.read_ptr(off + 24)
p_filesz = self.read_ptr(off + 32)
p_memsz = self.read_ptr(off + 40)
p_align = self.read_ptr(off + 48)
else:
p_type = self.read_u32(off)
p_offset = self.read_ptr(off + 4)
p_vaddr = self.read_ptr(off + 8)
p_paddr = self.read_ptr(off + 12)
p_filesz = self.read_ptr(off + 16)
p_memsz = self.read_ptr(off + 20)
p_flags = self.read_u32(off + 24)
p_align = self.read_ptr(off + 28)
phdr = {
'type': p_type,
'flags': p_flags,
'offset': p_offset,
'vaddr': p_vaddr,
'paddr': p_paddr,
'filesz': p_filesz,
'memsz': p_memsz,
'align': p_align,
'hdr_offset': off,
}
self.phdrs.append(phdr)
return self.phdrs
def fix_program_headers(self):
"""
修复1: 将Program Header中的地址转换为文件偏移
内存dump时 p_offset 可能等于 p_vaddr(因为是从内存映射的)
需要将 p_vaddr 减去 base_addr 得到正确的文件偏移
"""
print("\n[*] Fixing Program Headers...")
for i, phdr in enumerate(self.phdrs):
if phdr['type'] == self.PT_NULL:
continue
old_offset = phdr['offset']
# 内存dump中,offset通常等于vaddr
# 需要转换回文件偏移
if self.base_addr > 0:
new_offset = phdr['vaddr'] - self.base_addr
else:
# 如果没有提供base_addr,使用第一个LOAD段推断
if phdr['type'] == self.PT_LOAD and i == 0:
# 第一个LOAD段的offset通常为0
self.base_addr = phdr['vaddr']
new_offset = 0
else:
new_offset = phdr['vaddr'] - self.base_addr
if new_offset != old_offset and new_offset >= 0:
phdr['offset'] = new_offset
self._write_phdr_offset(phdr)
print(f" [{i}] offset: 0x{old_offset:x} -> 0x{new_offset:x}")
def _write_phdr_offset(self, phdr):
off = phdr['hdr_offset']
if self.is_64bit:
self.write_ptr(off + 8, phdr['offset'])
else:
self.write_ptr(off + 4, phdr['offset'])
def parse_dynamic(self):
"""解析DYNAMIC段"""
self.dynamic = OrderedDict()
self.dynamic_entries = []
dyn_phdr = None
for phdr in self.phdrs:
if phdr['type'] == self.PT_DYNAMIC:
dyn_phdr = phdr
break
if not dyn_phdr:
print("[-] No DYNAMIC segment found")
return
dyn_offset = dyn_phdr['offset']
dyn_size = dyn_phdr['filesz']
entry_size = 2 * self.ptr_size