Python批量处理apk脚本

简述

逆向分析应用时想快速浏览一批样本的信息,人工逐个统计太费时费力,遂想着写个python脚本批量处理,摆脱傻瓜式操作。


实现功能

  • 批量获取apk信息
  • 批量修改apk文件名
  • 批量下载apk

思路

  • androguard可以便捷地获取apk的相关信息(文件md5、签名md5、包名等),再用openpyxl写入excel文件。
  • 通过hashlib对apk文件进行摘要,获取md5值后用os.rename()重命名文件。
  • 将apk的下载链接放到excel表的第一列,用openpyxl获取所有链接,再用request获取response,然后判断状态码和文件大小,,再以二进制写入文件。

遇到的难点

  • 一开始是想着用apktoolappt解析apk的,搜索后发现androguard更简单方便,写到签名解析代码时发现androguard没提供直接获取签名md5的api,最后去github项目处里发现一个androsign.py文件可以打印签名信息,它是提取了META-INF下的证书信息,再使用hashlib.md5摘要获取证书md5。最终整合了相关代码,实现了获取签名md5。
  • xlwt处理excel文件时不能生成xlsx格式,所以换了openpyxl库。另外,自定义字体、填充颜色也挺方便,对应的类是FontPatternFill
  • 在下载apk时要判断文件格式是否为zip,但请求头不一定有Content-Type,所以只判断状态码和文件大小。


Show me the code

#!/usr/bin/python3
"""
@description: 批量处理APK文件
- 修复移动文件的bug
- 修复下载非apk文件会出错的bug
@author: yaorc
@date: 2021/4/20 10:35
@version: 1.2
"""
import getopt
import hashlib
import os
import random
import re
import shutil
import sys
import time
from urllib.parse import unquote

import openpyxl
import requests
from androguard.core.bytecodes.apk import APK
from androguard import util
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill

now = time.strftime("%Y%m%d%H%M%S", time.localtime())
download_dir = os.path.join(os.getcwd(), now)


def usage():
    print('————————— 使用说明 —————————')
    print('apk工具箱 v1.2 build on 2021/04/20')
    print('-h, --help:         帮助')
    print('-d, --download:     批量下载文件')
    print('-r, --rename:       以MD5重命名Apk文件')
    print('-i, --info:         获取Apk信息')
    print('-m, --move:         移动Apk')


def save_log(log_path, content):
    print(content)
    with open(log_path, 'a', encoding='utf8') as fw:
        fw.write(content + '\n')


def get_download_links(filename):
    """
    获取 excel表格的所有链接
    :param filename: excel表格
    :return:
    """
    links = []
    reg_ex = re.compile(r'[a-zA-z]+://[^\s]*')

    wb = openpyxl.load_workbook(filename)
    ws = wb.worksheets[0]
    max_row = ws.max_row
    for row in range(1, max_row + 1):
        url = str(ws.cell(row=row, column=1).value)
        if not re.match(reg_ex, url):
            url = "http://" + url
        links.append(url)
    return links


def get_excel_data(src_table, column):
    """
    取出excel数据
    :return:
    """
    wb = openpyxl.load_workbook(src_table)
    ws = wb['Sheet1']
    max_row = ws.max_row
    cell_values = []
    for i in range(1, max_row + 1):
        filename = ws.cell(row=i, column=column).value
        cell_values.append(filename)
    return cell_values


class WriteData:
    def __init__(self, filename, navigation_bar):
        self.wb = Workbook()
        self.ws = self.wb.active
        self.filename = filename
        self.font = Font(name='等线', size=11)
        self.fill = PatternFill("solid", fgColor="E0EEE0")
        self.title = navigation_bar

    def write_data(self, row=1, data=None, is_init=False):
        """
        excel表格处理:写入数据
        :param row:
        :param data:
        :param is_init:
        :return:
        """
        if not is_init:
            # 插入 apk信息
            for col in range(1, len(data) + 1):
                operate = self.ws.cell(row=row + 2, column=col, value=data[col - 1])
                operate.font = self.font
        else:
            # 初始化操作:插入标题栏
            for i in range(1, len(self.title) + 1):
                operate = self.ws.cell(row=row, column=i, value=self.title[i - 1])
                operate.font = self.font
                operate.fill = self.fill

        self.wb.save(os.path.join(download_dir, self.filename + '.xlsx'))


def get_file_name(url, headers):
    filename = str(random.randint(10000, 99999))

    if 'Content-Disposition' in headers and headers['Content-Disposition']:
        disposition_split = headers['Content-Disposition'].split(';')
        if len(disposition_split) > 1:
            if disposition_split[1].strip().lower().startswith('filename='):
                file_name = disposition_split[1].split('=')
                if len(file_name) > 1:
                    filename = unquote(file_name[1])
    if not filename and os.path.basename(url):
        filename = os.path.basename(url).split("?")[0]
    if not filename:
        return time.time()
    return filename


def get_apk_info(filepath):
    """
    获取apk信息
    :param filepath:
    :return:
    """
    apk_info = []

    try:
        apk = APK(filepath)
        if apk.is_valid_APK():
            apk_info.append(apk.get_app_name())
            apk_info.append(apk.get_package())
            apk_info.append(get_cert_md5(apk))
            apk_info.append(apk.get_androidversion_name())
            apk_info.append(str(apk.get_target_sdk_version()))
            apk_info.append(str(apk.get_min_sdk_version()))
            apk_info.append(get_apk_issuer(apk))
            apk_info.append(get_reinforce(apk))
    except Exception as e:
        print(filepath + ' ->>', e)
    return apk_info


def get_file_md5(filepath):
    with open(filepath, 'rb') as f:
        md5obj = hashlib.md5()
        md5obj.update(f.read())
        md5 = md5obj.hexdigest()
        md5 = str(md5).lower()
    return md5


def get_file_size(filepath):
    return str(round(float(os.path.getsize(filepath) / 1048576.0), 2)) + ' MB'


def get_cert_md5(apk):
    """
    获取证书md5
    :param apk:
    :return:
    """
    cert_md5 = ''
    certs = set(apk.get_certificates_der_v2() + [apk.get_certificate_der(x) for x in apk.get_signature_names()])
    for cert in certs:
        cert_md5 = hashlib.md5(cert).hexdigest()
    return cert_md5


def get_apk_issuer(apk):
    """
    获取开发者
    :param apk:
    :return:
    """
    issuer = ''
    for signature in apk.get_signature_names():
        cert = apk.get_certificate(signature)
        all_issuer_info = util.get_certificate_name_string(cert.issuer.native).split(',')
        for issuer_info in all_issuer_info:
            if 'commonName' in issuer_info:
                issuer = issuer_info.split('=')[1]
    return issuer


def get_reinforce(apk):
    """
    识别应用是否加固
    :param apk:
    :return: 识别结果
    """
    discern_result = '未加固/未知加固厂商'
    try:
        for apk_file in apk.get_files():
            if '.so' in apk_file or '.dex' in apk_file or '.dat' in apk_file or '.ajm' in apk_file:
                filename = os.path.basename(apk_file)
                for k, v in reinforcement_vendor().items():
                    if filename == k:
                        discern_result = v
                        return discern_result
    except Exception as e:
        print('get_reinforce() Exception ->> ', e)
    return discern_result


def reinforcement_vendor():
    """
    加固厂商
    :return:
    """
    reinforce_info = {
        'libedog.so': '娜迦-企业版',
        'libchaosvmp.so': '娜迦-免费版',
        'libddog.so': '娜迦-免费版',
        'libfdog.so': '娜迦-免费版',
        'ijiami.ajm': '爱加密',
        'libexec.so': '爱加密',
        'libexecmain.so': '爱加密',
        'ijiami.dat': '爱加密',
        'libDexHelper.so': '梆梆-企业版',
        'libDexHelper-x86.so': '梆梆-企业版',
        'libsecexe.so': '梆梆-免费版',
        'libsecmain.so': '梆梆-免费版',
        'libSecShell.so': '梆梆-免费版',
        'libSecShell-x86.so': '梆梆-免费版',
        'libprotectClass.so': '360加固保',
        'libjiagu.so': '360加固保',
        'libjiagu_art.so': '360加固保',
        'libjiagu_x86.so': '360加固保',
        'libegis.so': '通付盾',
        'libNSaferOnly.so': '通付盾',
        'libnqshield.so': '网秦',
        'libbaiduprotect.so': '百度',
        'aliprotect.dat': '阿里聚安全',
        'libsgmain.so': '阿里聚安全',
        'libsgsecuritybody.so': '阿里聚安全',
        'libmobisec.so': '阿里聚安全',
        # 'libBugly.so': '腾讯乐固',
        'libshell.so': '腾讯乐固',
        'mix.dex': '腾讯乐固',
        'mixz.dex': '腾讯乐固',
        'libtosprotection.armeabi.so': '腾讯御安全',
        'libtosprotection.armeabi-v7a.so': '腾讯御安全',
        'libtosprotection.x86.so': '腾讯御安全',
        'libnesec.so': '网易易盾',
        'libAPKProtect.so': 'APKProtect',
        'libkwscmm.so': '几维安全',
        'libkwscr.so': '几维安全',
        'libkwslinker.so': '几维安全',
        'libx3g.so': '顶象',
        'libapssec.so': '盛大',
        'librsprotect.so': '瑞星'
    }
    return reinforce_info


def download_file(excel_file):
    if not os.path.isfile(excel_file):
        print('输入的参数[ %s ]不是文件,请重新运行' % excel_file)
        return

    if not os.path.exists(download_dir):
        os.mkdir(download_dir)

    # 获取下载链接
    req_urls = get_download_links(excel_file)
    # 初始化输出表格
    wd = WriteData(filename='download_apk_' + now,
                   navigation_bar=['url', 'result', 'filename', 'file_md5', 'size',
                                   'app_name', 'pkg_name', 'cert_md5', 'app_version', 'target_sdk',
                                   'min_sdk', 'issuer', 'reinforce'])
    wd.write_data(is_init=True)

    req_headers = {
        'User-Agent': 'Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2 XL Build/OPD1.170816.004) '
                      'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Mobile Safari/537.36',
        'Connection': 'keep-alive'}

    print('共%d条链接,准备开始下载...\n' % len(req_urls))
    print('序号', '\t下载结果', '\t\t 下载链接')
    for index, url in enumerate(req_urls, start=1):
        try:
            response = requests.get(url, headers=req_headers, timeout=10)

            # 是否满足下载要求
            if not download_condition(response):
                print(index, '\t\t失败\t\t\t', url)
                wd.write_data(row=index - 1, data=[url, '失败'])
                continue

            # 写入文件
            file_name = get_file_name(url, response.headers)
            file_path = os.path.join(download_dir, file_name)
            with open(file_path, 'wb') as fw:
                fw.write(response.content)

            # 获取文件的基本信息:文件名称、文件MD5、文件大小
            print(index, '\t\t成功\t\t\t', url)
            excel_data = [url, '成功', file_name, get_file_md5(file_path), get_file_size(file_path)]
            # 如果文件是apk类型,则在基础信息上增加应用名、包名、签名、SDK版本等信息
            if file_name[-4:] == '.apk':
                excel_data = excel_data + get_apk_info(file_path)
            wd.write_data(row=index - 1, data=excel_data)
        except Exception as e:
            print(index, '\t\t失败\t\t\t', url)
            print('请求时发生异常:', e)
            wd.write_data(row=index - 1, data=[url, '失败'])

    print('\n下载操作结束,共处理%d条链接。' % req_urls.__len__(), end='')


def download_condition(response) -> bool:
    """
    下载检测,是否符合设定条件
    :param response:
    :return:
    """
    status_code = response.status_code
    resp_headers = response.headers

    file_size = 0
    if 'Content-Length' in resp_headers.keys():
        file_size = round(float(resp_headers['Content-Length']) / 1048576.0, 2)

    if status_code != 200:
        print('Warning: 状态码非200')
        return False
    elif file_size > 100:
        print('Warning: 文件大于100MB')
        return False
    else:
        return True


def rename2md5(dir_path):
    """
    以MD5重命名文件,不改变原本的文件格式
    :param dir_path: 文件夹路径
    :return:
    """
    print(f'准备进行重命名操作,路径:{dir_path}\n')
    log_path = os.path.join(os.getcwd(), 'rename2md5.log')

    for root, dirs, files in os.walk(dir_path):
        for f in files:
            file_path = os.path.join(root, f)
            suffix = os.path.splitext(f)[1]
            if suffix == '':
                suffix = '.apk'
            new_name = get_file_md5(file_path) + suffix
            new_path = os.path.join(root, new_name)
            try:
                save_log(log_path, f'{f} >> {new_name}')
                os.rename(file_path, new_path)
            except FileExistsError as e:
                print(f, e)
            except Exception as e:
                print(f, e)
    print('\n重命名操作完毕,', end='')


def file_info(dir_path):
    """
    获取文件信息
    :param dir_path: 文件所在目录
    :return:
    """
    if not os.path.isdir(dir_path):
        print('参数输入有误,不是一个目录...')
        return

    if not os.path.exists(download_dir):
        os.mkdir(download_dir)

    wd = WriteData(filename='apk_info_' + now,
                   navigation_bar=['filename', 'file_md5', 'size', 'app_name', 'pkg_name', 'cert_md5',
                                   'app_version', 'target_sdk', 'min_sdk', 'issuer', 'reinforce'])
    wd.write_data(is_init=True)

    print('序号', '\t', '文件信息')
    for root, dirs, files in os.walk(dir_path):
        for index, f in enumerate(files, start=1):
            file_path = os.path.join(root, f)

            # 获取文件的基本信息
            info = [f, get_file_md5(file_path), get_file_size(file_path)]
            # 如果文件是apk类型,则继续获取应用名、包名、签名等信息
            if f[-4:] == '.apk':
                info = info + get_apk_info(file_path)

            print(index, '\t', str(info))
            wd.write_data(data=info, row=index - 1)
    print(f'\n已获取{dir_path}中的文件信息。', end='')


def move_file(excel_path, src_path):
    """
    移动文件
    :param excel_path:
    :param src_path:
    :return:
    """
    print('准备移动文件.')

    dst_path = os.path.join(src_path, now)
    if not os.path.exists(dst_path):
        os.mkdir(dst_path)

    target_files = get_excel_data(excel_path, 1)

    src_files = os.listdir(src_path)
    for target_file in target_files:
        if target_file in src_files:
            src_filepath = os.path.join(src_path, target_file)
            dst_filepath = os.path.join(dst_path, target_file)
            try:
                shutil.move(src_filepath, dst_filepath)
                print(target_file, ' >> ', dst_filepath)
            except Exception as e:
                print(f'移动{src_filepath}时发生异常:', e)
    print(f'\n移动文件完成,共处理{target_files.__len__()}个文件。', end='')


def format_time(seconds):
    m, s = divmod(seconds, 60)
    h, m = divmod(m, 60)
    return "%02d:%02d:%02d" % (h, m, s)


def main(argv):
    """
    解析参数
    :param argv: 输入参数
    :return:
    """
    start = time.time()
    opts, args = [], []

    try:
        opts, args = getopt.getopt(argv[1:], 'hd:r:i:m:', ['help', 'download=', 'rename=', 'info=', 'move='])
    except getopt.GetoptError as err:
        print('读取参数时发生错误!', err)

    for name, value in opts:
        if name in ('-h', '--help'):
            usage()
            return
        elif name in ('-d', '--download'):
            # python3 apk_toolbox_v1.2.py -d test.xlsx
            download_file(value)
        elif name in ('-r', '--rename'):
            # python3 apk_toolbox_v1.2.py -r apk所在目录
            rename2md5(value)
        elif name in ('-i', '--info'):
            # python3 apk_toolbox_v1.2.py -i apk所在目录
            file_info(value)
        elif name in ('-m', '--move'):
            # python3 apk_toolbox_v1.2.py -m test.xlsx apk所在目录
            move_file(value, args[0])

    print('耗时:%s' % format_time(time.time() - start))


if __name__ == '__main__':
    main(sys.argv)

使用说明

查看help:
image

下载样本:
image

批量获取apk信息:
image

批量改名:
image
image

posted @ 2021-04-20 17:31  yaorc  阅读(756)  评论(0)    收藏  举报