ctf reverse 一道虚拟机风格的逆向题目/qsnctf/reverse/一起做杯下午茶吧

题目下载来源：qsnctf/reverse/一起做杯下午茶吧
题目直接下载：上传的文件

简单运行尝试：

Would you like to have a cup of tea with xianyuuuan ?
 s s s
 \___/     1

 \___/     2
1 or 2 >>1
Can you put some sugar into the tea?
012345678901234
length wrong!!
Oh!you slip the tea!Get out!!

Would you like to have a cup of tea with xianyuuuan ?
 s s s
 \___/     1

 \___/     2
1 or 2 >>2
xianyuuuan says ,Why not make the tea by yourself?
Put in the sugar you got in tea 1
012345678901234
Put in the tea leaves:
01234567890123456789
preparing...
NO.the sugar or leaves may be wrong

1.混淆去除

IDA中，进入loc_401620处，可以看到有花指令：

.text:00401741                 jnz     short loc_401745    //patch为jmp
.text:00401756                 jnz     short loc_40175A    //patch为jmp
.text:00401771                 jnz     short loc_401775    //patch为jmp

patch完毕之后于.text:00401620处按P来create function。

2. 去除反调试

.text:00401745                 call    ds:IsDebuggerPresent
.text:0040174B                 test    eax, eax
.text:0040174D                 jz      short loc_401763    //patch为jmp可去除反调试

3. 第一个加密函数

在004016FB可看到针对用户的第一部分的输入，做了字符串长度测量，必须为16字节长度

.text:004016FB                 lea     ecx, [ebp-30h]
.text:004016FE                 add     esp, 8
.text:00401701                 lea     edx, [ecx+1]
.text:00401704
.text:00401704 loc_401704:                             ; CODE XREF: sub_401620+E9↓j
.text:00401704                 mov     al, [ecx]
.text:00401706                 inc     ecx
.text:00401707                 test    al, al
.text:00401709                 jnz     short loc_401704
.text:0040170B                 sub     ecx, edx
.text:0040170D                 cmp     ecx, 10h
.text:00401710                 jz      short loc_401734
.text:00401712                 push    offset aLengthWrong ; "length wrong!!\n"
.text:00401717                 call    sub_401020
.text:0040171C                 add     esp, 4

loc_401775处到.text:00401803是一个加密函数，还原（以及解密函数）：

#define _CRT_SECURE_NO_WARNINGS
#include<stdio.h>

typedef unsigned int dword;
typedef unsigned char byte;

void keyfunc_encrypt()
{
	dword ebp3c_counter = 0;
	byte user_input[17] = "0123456789012345"; //测试输入
	dword* duser_input = (dword*)user_input;
	for (;ebp3c_counter < 4; ebp3c_counter+=2)
	{
		dword edi = 0;
		for (int ebp38 = 0x20; ebp38 > 0; ebp38--)
		{
			edi -= 0x61c88647;
			dword temp_n = duser_input[ebp3c_counter];
			dword temp_nPLUS1 = duser_input[ebp3c_counter + 1];
			temp_n += ((temp_nPLUS1 >> 5) + 0x696D616E) ^ ((temp_nPLUS1 << 4) + (0x67626463^0xa)) ^ ((edi) + temp_nPLUS1);
			duser_input[ebp3c_counter] = temp_n;
			temp_nPLUS1 += ((temp_n >> 5) + (0x6B696C69^0xa)) ^ ((temp_n << 4) + 0x79645F65) ^ ((edi) + temp_n);
			duser_input[ebp3c_counter + 1] = temp_nPLUS1;
		}
	}
        printf("加密结果为：%s", user_input);
	return;
}

//对应的解密函数
//在加密函数之后
//.text:0040180E                 cmp     eax, dword_404174
//此处可以看到针对用户第一部分输入的加密结果的比对。dword_404174开始的16字节即为要解密的目标。
void keyfunc_decrypt()
{
	byte target[16] = { 0x12, 0xd1, 0x82, 0xe8, 0x40, 0xea, 0x66, 0xea, 0xde, 0x1f, 0x17, 0xff, 0x08, 0x0d, 0x51, 0x2a };
	dword ebp3c_counter = 2;
	dword* duser_input = (dword*)target;
	for (; ebp3c_counter != dword(-2); ebp3c_counter -= 2)
	{
		dword edi = (dword)(0 - 0x20 * 0x61c88647);
		//dword edi = 0xC6EF3720;
		for (int ebp38 = 0x20; ebp38 > 0; ebp38--)
		{
			
			dword temp_n = duser_input[ebp3c_counter];
			dword temp_nPLUS1 = duser_input[ebp3c_counter + 1];
			temp_nPLUS1 -= ((temp_n >> 5) + (0x6B696C69 ^ 0xa)) ^ ((temp_n << 4) + 0x79645F65) ^ ((edi) + temp_n);
			duser_input[ebp3c_counter + 1] = temp_nPLUS1;
			temp_n -= ((temp_nPLUS1 >> 5) + 0x696D616E) ^ ((temp_nPLUS1 << 4) + (0x67626463 ^ 0xa)) ^ ((edi) + temp_nPLUS1);
			duser_input[ebp3c_counter] = temp_n;
			edi += 0x61c88647;
		}
	}
        printf("解密结果为：%s", target); //解密结果只有前16个字节
	return;
}

4. 第二部分加密函数

在第一部分的加密结果比对成功之后，最终会调用sub_401570函数。

.text:00401570                 push    ecx
.text:00401571                 push    offset aXianyuuuanSays ; "xianyuuuan says ,Why not make the tea b"...
.text:00401576                 call    sub_401020
.text:0040157B                 add     esp, 4
.text:0040157E                 push    offset aPutInTheSugarY ; "Put in the sugar you got in tea 1\n"
.text:00401583                 call    sub_401020
.text:00401588                 add     esp, 4
.text:0040158B                 push    offset unk_404544
.text:00401590                 push    offset aS       ; "%s"
.text:00401595                 call    sub_401060             ;先输入第一部分的揭秘结果
.text:0040159A                 push    offset aPutInTheTeaLea ; "Put in the tea leaves:\n"
.text:0040159F                 call    sub_401020
.text:004015A4                 add     esp, 0Ch
.text:004015A7                 push    (offset xmmword_40451C+4) ;可看到用户第二部分的输入存储于404520处（即imagebase+4520）。
.text:004015AC                 push    offset aS       ; "%s"
.text:004015B1                 call    sub_401060             ;再输入“leaves”
.text:004015B6                 call    sub_4013B0             ;第二个加密函数
.text:004015BB                 push    offset aPreparing ; "preparing...\n"
.text:004015C0                 call    sub_401020             ;打印字符串
.text:004015C5                 add     esp, 0Ch
.text:004015C8                 push    7D0h            ; dwMilliseconds
.text:004015CD                 call    ds:Sleep
.text:004015D3                 mov     eax, dword_4044EC
.text:004015D8                 cmp     eax, dword_404058      ;对第二部分输入的加密结果进行比对, 对比加密结果的第一个dword
.text:004015DE                 jnz     short loc_401602
.text:004015E0                 mov     eax, dword_4044F0
.text:004015E5                 cmp     eax, dword_40405C      ;对第二部分输入的加密结果进行比对, 对比加密结果的第二个dword。
                                                              ;前两个dword相同则对比成功
.text:004015EB                 jnz     short loc_401602
.text:004015ED                 push    offset aWowYouAreTheTe ; "wow!you are the tea master!!!\n"
.text:004015F2                 call    sub_401020

确定004013B0为第二部分的加密函数。
第二个加密函数类似虚拟机，先malloc(0x7c)

.text:004013CB                 push    7Ch ; '|'       ; Size
.text:004013CD                 call    ds:malloc

此0x7C空间结构为：

dword s0;
dword s1; 
dword s2; 
dword s3;
dword s4; 
dword s5; 
dword s6; 
dword s7; //前8个双字是加密函数的中间运算结果的存放处，是操作台
dword eip; //类似指令指针，它在每轮加密开始时初始化为0x00404078(imagebase+0x4078)
dword 0xF0; //给每个函数的编号
dword sub_4010A0    //编号为0xF0的函数的地址
dword 0xF1;
dword sub_401130
dword 0xF2;
dword sub_401160
dword 0xF3;
dword sub_4012B0
dword 0xF4;
dword sub_401310
dword 0xF5;
dword sub_401190
dword 0xF6;
dword sub_401200
dword 0xF7;
dword sub_401100
dword 0xF8;
dword sub_401280
dword 0xF9;
dword sub_401340
dword 0xFA;
dword sub_4012D0

之后初始化0x7c空间，即将前8个双字（s0~s7)清零，eip重新指向虚拟机指令开始处404078。
404518地址处开始的8个dword中存放着s0~s7的八个地址。
加密函数粗略还原：


//404078处存放的虚拟机指令序列
byte byte4078_encrypt_key_map[] = {
	 0xF0, 0xE0, 0x05, 0x4D, 0xF0, 0xE0, 0x04, 0x00, 0xF0, 0xE0, 0x03, 0x00, 0xF3, 0xCC, 0xF0, 0xE0,
	 0x06, 0x01, 0xF9, 0xE0, 0x00, 0x00, 0xF0, 0xE0, 0x06, 0x05, 0xF5, 0x00, 0x06, 0xF0, 0xE1, 0x01,
	 0x00, 0xF0, 0xE0, 0x06, 0x01, 0xF9, 0xE0, 0x00, 0x00, 0xF0, 0xE0, 0x06, 0x06, 0xF6, 0x00, 0x06,
	 0xF7, 0x01, 0x00, 0xF0, 0xE0, 0x06, 0x01, 0xF9, 0xE0, 0x00, 0x00, 0xF1, 0x00, 0x01, 0xF0, 0xE1,
	 0x07, 0x00, 0xF0, 0xE1, 0x00, 0x04, 0xF0, 0xE0, 0x06, 0x03, 0xF8, 0x06, 0x00, 0xF9, 0xE0, 0x00,
	 0x01, 0xF1, 0x00, 0x04, 0xF7, 0x07, 0x00, 0xF0, 0xE0, 0x06, 0x00, 0xF9, 0xE0, 0x00, 0x00, 0xF1,
	 0x00, 0x07, 0xF0, 0xE0, 0x06, 0x00, 0xF9, 0xE1, 0x00, 0x00, 0xF0, 0xE0, 0x06, 0x00, 0xF9, 0xE0,
	 0x02, 0x02, 0xF2, 0x04, 0x02, 0xF0, 0xE0, 0x06, 0x00, 0xF9, 0xE0, 0x00, 0x00, 0xF0, 0xE0, 0x06,
	 0x05, 0xF5, 0x00, 0x06, 0xF0, 0xE1, 0x01, 0x00, 0xF0, 0xE0, 0x06, 0x00, 0xF9, 0xE0, 0x00, 0x00,
	 0xF0, 0xE0, 0x06, 0x06, 0xF6, 0x00, 0x06, 0xF7, 0x01, 0x00, 0xF0, 0xE0, 0x06, 0x00, 0xF9, 0xE0,
	 0x00, 0x00, 0xF1, 0x00, 0x01, 0xF0, 0xE1, 0x07, 0x00, 0xF0, 0xE1, 0x00, 0x04, 0xF0, 0xE0, 0x06,
	 0x07, 0xF6, 0x00, 0x06, 0xF0, 0xE0, 0x06, 0x03, 0xF8, 0x06, 0x00, 0xF9, 0xE0, 0x00, 0x01, 0xF1,
	 0x00, 0x04, 0xF7, 0x07, 0x00, 0xF0, 0xE0, 0x06, 0x01, 0xF9, 0xE0, 0x00, 0x00, 0xF1, 0x00, 0x07,
	 0xF0, 0xE0, 0x06, 0x01, 0xF9, 0xE1, 0x00, 0x00, 0xF0, 0xE0, 0x06, 0x01, 0xF1, 0x03, 0x06, 0xFA,
	 0x03, 0x05, 0xF4, 0xD4, 0xFB, 0x00, 0x00, 0x00
};
dword heap_addr_dword4518[11] = { 0 };
dword* xmmword451c = (dword*)((byte*)&heap_addr_dword4518[0] + 4);
//用户的第二部分输入存放在404520地址处
dword* user_input = &xmmword451c[1];

dword input_0__dword_404510[2] = { 0 };
dword* input_1__dword_404514 = &input_0__dword_404510[1];
byte unk_404544[16 + 1] = "put_some_sugar!!"; //首先输入的第一部分的解密结果
dword dword_404170 = 0x114514;
dword* off_403300[4] = { input_0__dword_404510, (dword*)&unk_404544, &dword_404170, 0 };
double qword_403330[2] = { 0, 4294967296 };
dword d4044EC[9] = { 0 }; dword* d4044F0 = &d4044EC[1];
double xmm403310 = 2.0L;

//编号0xF0，函数的作用是：mov
void sub_4010A0(byte* heap) //0 mov
{
	byte* edi = (byte*)(*(dword*)(&heap[0x20]));
	byte al = edi[1];
	if (al == 0xE0)
	{
		*(dword*)(((dword*)heap_addr_dword4518)[edi[2]]) = edi[3];
		*(dword*)&heap[0x20] += 4;
		return; //another return;
	}
	else if (al == 0xE1)
	{
		*(dword*)(((dword*)heap_addr_dword4518)[edi[2]]) = *(dword*)(((dword*)heap_addr_dword4518)[edi[3]]);
	}

	*(dword*)&heap[0x20] += 4;
	return;
}

//编号0xF1，函数的作用是：add
void sub_401130(byte* heap) 
{
	byte* ecx = (byte*)(*(dword*)(&heap[0x20]));
	//byte* ecx = &heap[0x20]; //todo wo delete de dui ma?
	*(dword*)(((dword*)heap_addr_dword4518)[ecx[1]]) += *(dword*)(((dword*)heap_addr_dword4518)[ecx[2]]);
	*(dword*)&heap[0x20] += 3;
	return;
}

//编号0xF2，函数的作用是：sub
void sub_401160(byte* heap)
{
	byte* ecx = (byte*)(*(dword*)(&heap[0x20]));
	*(dword*)(((dword*)heap_addr_dword4518)[ecx[1]]) -= *(dword*)(((dword*)heap_addr_dword4518)[ecx[2]]);
	*(dword*)&heap[0x20] += 3;
	return;
}

//编号0xF3，函数的作用是：jmp
void sub_4012B0(byte* heap)
{
	byte* eax = (byte*)(*(dword*)(&heap[0x20]));
	eax += eax[1];
	*(dword*)&heap[0x20] = (dword)eax;
	return;
}

//编号0xF4，函数的作用是：有条件的jmp
//在404078处的虚拟机指令序列中只出现了一次，紧跟在编号0xFA的指令之后，
//此指令跟结束加密循环相关。
void sub_401310(byte* heap) 
{
	byte* ecx = (byte*)(*(dword*)(&heap[0x20]));
	if (xmmword451c[0] == 1)  //当40451c处为1，eip就会加2，指向字节0xFB，为终止指令
	{
		*(byte**)&(heap[0x20]) = ecx + 2;
		return;
	}
	else
	{
		ecx -= ecx[1];
		*(dword*)&heap[0x20] = (dword)ecx;
		return;
	}
}

//编号0xF5，函数的作用是：浮点数乘法计算
void sub_401190(byte* heap) 
{
	byte* ecx = (byte*)(*(dword*)(&heap[0x20]));
	int eax = *(int*)((dword*)heap_addr_dword4518)[ecx[2]];
	double select = qword_403330[(dword)eax >> 0x1f];
	double temp2 = (double)(eax)+select;
	double temp = pow(xmm403310, temp2);
	//printf("%lf", temp);
	eax = *(int*)((dword*)heap_addr_dword4518)[ecx[1]];
	*(int*)((dword*)heap_addr_dword4518)[ecx[1]] = (int)(__int64)(temp * (double(eax) + (double)(qword_403330[(dword)eax >> 0x1f])));
	*(dword*)(&heap[0x20]) += 3;
	return;
}

//编号0xF6，函数的作用是：浮点数除法计算
void sub_401200(byte* heap) 
{
	byte* ecx = (byte*)(*(dword*)(&heap[0x20]));
	int eax = *(int*)(heap_addr_dword4518[ecx[1]]);
	double var8 = (double)eax + qword_403330[(dword)eax >> 0x1f];
	eax = *(dword*)heap_addr_dword4518[ecx[2]];
	double temp = pow(xmm403310, (double)eax + qword_403330[(dword)eax >> 0x1f]);
	*(int*)heap_addr_dword4518[ecx[1]] = (int)(__int64)(var8 / temp);	//todo
	//printf("%lf", var8);
	*(dword*)(&heap[0x20]) += 3;
	return;
}

//编号0xF7，函数的作用是：xor
void sub_401100(byte* heap) 
{
	byte* ecx = (byte*)(*(dword*)(&heap[0x20]));
	*(dword*)(((dword*)heap_addr_dword4518)[ecx[1]]) ^= *(dword*)(((dword*)heap_addr_dword4518)[ecx[2]]);
	*(dword*)&heap[0x20] += 3;
	return;
}

//编号0xF8，函数的作用是：and
void sub_401280(byte* heap)
{
	byte* ecx = (byte*)(*(dword*)(&heap[0x20]));
	*(dword*)(((dword*)heap_addr_dword4518)[ecx[1]]) &= *(dword*)(((dword*)heap_addr_dword4518)[ecx[2]]);
	*(dword*)&heap[0x20] += 3;
	return;
}

//编号0xF9，函数的作用
//包括：取用户的输入，存放加密运算的中间结果
void sub_401340(byte* heap)
{
	dword* dword_heap = (dword*)heap;
	byte* ecx = (byte*)(*(dword*)(&heap[0x20]));
	if (ecx[1] == 0xE0)
	{
		dword temp = *(dword*)&heap[0x18];
		dword edx = (dword)(off_403300[ecx[3]]);
		*(dword*)heap_addr_dword4518[ecx[2]] = *(dword*)(edx + temp * 4);
		*(dword*)&heap[0x20] += 4;
		return;
	}
	else if (ecx[1] == 0xE1)
	{
		dword temp = *(dword*)&heap[0x18];
		*(dword*)((dword)(off_403300[ecx[3]]) + temp * 4) = *(dword*)heap_addr_dword4518[ecx[2]];
	}
	*(dword*)&heap[0x20] += 4;
	return;
}

//编号0xFA，函数的作用是：循环条件控制
//s3代表着当前循环轮次，当s3>s5时，即s3>0x4D时（s5为常值0x4D），
//就会向40451c处写入1，代表即将跳出循环，结束本次加密。
void sub_4012D0(byte* heap)
{
	byte* edx = (byte*)(*(dword*)(&heap[0x20]));
	*(dword*)(&heap[0x20]) = (dword)(edx + 3);
	if (*(dword*)heap_addr_dword4518[edx[1]] > *(dword*)heap_addr_dword4518[edx[2]])
	{
		xmmword451c[0] = 1;
	}
	return;
}
void keyfunc2_encrypt()
{
	printf("第二部分输入：");
	scanf("%s", (byte*)user_input);
	byte* heap = (byte*)malloc(0x7c);
	dword* ebp20[4] = { 
		(dword*)((byte*)heap + 0x4), 
		(dword*)((byte*)heap + 0x8),
		(dword*)((byte*)heap + 0xc), 
		(dword*)((byte*)heap + 0x10) };

	for (int outer_counter_edi = 0; outer_counter_edi < 0x20; outer_counter_edi += 8)
	{
		input_0__dword_404510[0] = user_input[(dword)(outer_counter_edi) >> 2];
		*input_1__dword_404514 = user_input[((dword)(outer_counter_edi) >> 2) + 1];
		*(dword*)(&heap[0x14]) = 0;
		heap_addr_dword4518[5] = (dword)&heap[0x14];
		heap[0x24] = 0xF0;
		*(dword*)(&heap[0x28]) = (dword)sub_4010A0;
		heap[0x2c] = 0xF1;
		*(dword*)(&heap[0x30]) = (dword)sub_401130;
		heap[0x34] = 0xF2;
		*(dword*)(&heap[0x38]) = (dword)sub_401160;
		heap[0x3c] = 0xF3;
		*(dword*)(&heap[0x40]) = (dword)sub_4012B0;
		heap[0x44] = 0xF4;
		*(dword*)(&heap[0x48]) = (dword)sub_401310;
		heap[0x4C] = 0xF5;
		*(dword*)(&heap[0x50]) = (dword)sub_401190;
		heap[0x54] = 0xF6;
		*(dword*)(&heap[0x58]) = (dword)sub_401200;
		heap[0x5C] = 0xF7;
		*(dword*)(&heap[0x60]) = (dword)sub_401100;
		heap[0x64] = 0xF8;
		*(dword*)(&heap[0x68]) = (dword)sub_401280;
		heap[0x6C] = 0xF9;
		*(dword*)(&heap[0x70]) = (dword)sub_401340;
		heap[0x74] = 0xFA;
		*(dword*)(&heap[0x78]) = (dword)sub_4012D0;
		//
		*(dword*)(&heap[0]) = 0;
		*(dword*)(&heap[0x4]) = 0;
		*(dword*)(&heap[0x8]) = 0;
		*(dword*)(&heap[0xC]) = 0;
		*(dword*)(&heap[0x10]) = 0;
		*(dword*)(&heap[0x18]) = 0;
		*(dword*)(&heap[0x1C]) = 0;
		//
		heap_addr_dword4518[6] = (dword)&heap[0x18];
		heap_addr_dword4518[7] = (dword)&heap[0x1C];
		*(dword*)(&heap[0x20]) = (dword)&byte4078_encrypt_key_map[0];
		heap_addr_dword4518[0] = (dword)heap;
		{//movups  xmmword_40451C, xmm0
			xmmword451c[0] = (dword)ebp20[0];
			xmmword451c[1] = (dword)ebp20[1];
			xmmword451c[2] = (dword)ebp20[2];
			xmmword451c[3] = (dword)ebp20[3];
		}
		if (byte4078_encrypt_key_map[0] != 0xFB)
		{
			for (dword for_ecx = 0; **(byte**)&heap[0x20] != 0xFB;)
			{
				for_ecx = 0;
				for (byte* for_eax = &heap[0x24]; for_ecx < 0xB; for_eax += 8)
				{
					if (**(byte**)&heap[0x20] == for_eax[0])
					{
						((void (*)(byte*))(*(dword*)&heap[0x28 + for_ecx * 8]))(heap);
						break;
					}
					for_ecx++;
				}
			}
		}
		d4044EC[(dword)(outer_counter_edi) >> 2] = input_0__dword_404510[0];
		d4044F0[(dword)(outer_counter_edi) >> 2] = *input_1__dword_404514;
	}
	
	//这个结果比对是在text:004015D3处的
	if ((d4044EC[0] == 0x0D5BF7CB6) && (d4044F0[0] == 0x1CC08FA5))
	{
		printf("success\n");
		return;
	}
	printf("contrast failed.\n");
	return;
}

应当注意的是，在217行

        input_0__dword_404510[0] = user_input[(dword)(outer_counter_edi) >> 2];
        *input_1__dword_404514 = user_input[((dword)(outer_counter_edi) >> 2) + 1]

第一轮主循环开始时，用户输入的前8个字节会被转移到404510处的8字节空间中。
由于user_input的地址为404520，而404518处开始的8个dword，在主循环开始时初始化为s0到s7的八个地址（例如252、253、255、257行处），故当开始第二轮循环时，user_input，即404520处存放的应该是s2、s3的地址，这个地址由于每次malloc的堆的地址不同而变化，因此第二、三、四轮的加密是没有意义的，这也是004015D8处只比对了8个字节，也就是只比较第一轮的加密结果。
因此之后写的程序只加密用户输入的前8个字节，加密程序只写一轮。

5. 404078处虚拟机指令序列分析

右侧对应的是只是指令含义分析，缺少类型转换。

0xF0, 0xE0, 0x05, 0x4D, 				s5 = 0x4d
0xF0, 0xE0, 0x04, 0x00, 				s4 = 0x00
0xF0, 0xE0, 0x03, 0x00, 				s3 = 0x00
0xF3, 0xCC,								goto label2;
label1: 
0xF0, 0xE0, 0x06, 0x01, 				s6 = 0x01
0xF9, 0xE0, 0x00, 0x00, 				s0 = input[1]
0xF0, 0xE0, 0x06, 0x05, 				s6 = 0x05
0xF5, 0x00, 0x06, 				        s0 = (pow(2.0, 5)*(s0+3330[s0>>1f])
0xF0, 0xE1, 0x01, 0x00, 				s1 = s0;
0xF0, 0xE0, 0x06, 0x01,					s6 = 0x01;
0xF9, 0xE0, 0x00, 0x00, 				s0 = input[1]
0xF0, 0xE0, 0x06, 0x06, 				s6 = 0x6
0xF6, 0x00, 0x06,						s0 = (s0 + 3330[s0>>1f])/(pow(2.0, 6))
0xF7, 0x01, 0x00,						s1 ^= s0
0xF0, 0xE0, 0x06, 0x01,					s6 = 0x01
0xF9, 0xE0, 0x00, 0x00,					s0 = input[1]
0xF1, 0x00, 0x01,						s0 += s1
0xF0, 0xE1, 0x07, 0x00,					s7 = s0
0xF0, 0xE1, 0x00, 0x04,					s0 = s4
0xF0, 0xE0, 0x06, 0x03,					s6 = 0x3
0xF8, 0x06, 0x00,						s6 &= s0
0xF9, 0xE0, 0x00, 0x01,					s0 = a_putsomesugar[s6]
0xF1, 0x00, 0x04,						s0 += s4
0xF7, 0x07, 0x00,						s7 ^= s0
0xF0, 0xE0, 0x06, 0x00,					s6 = 0x00
0xF9, 0xE0, 0x00, 0x00,					s0 = input[0]
0xF1, 0x00, 0x07,						s0 += s7
0xF0, 0xE0, 0x06, 0x00,					s6 = 0
0xF9, 0xE1, 0x00, 0x00,					input[0] = s0
0xF0, 0xE0, 0x06, 0x00,					s6 = 0x00	//与下面相配合
0xF9, 0xE0, 0x02, 0x02,					s2 = 0x114514
0xF2, 0x04, 0x02,						s4 -= s2
0xF0, 0xE0, 0x06, 0x00,					s6 = 0x00
0xF9, 0xE0, 0x00, 0x00,					s0 = input[0]
0xF0, 0xE0, 0x06, 0x05,					s6 = 0x5
0xF5, 0x00, 0x06,						s0 = (pow(2.0, 5) * (s0 + 3330[s0>>1f])
0xF0, 0xE1, 0x01, 0x00, 				s1 = s0
0xF0, 0xE0, 0x06, 0x00,					s6 = 0x00
0xF9, 0xE0, 0x00, 0x00,					s0 = input[0]
0xF0, 0xE0, 0x06, 0x06, 				s6 = 0x6
0xF6, 0x00, 0x06,						s0 = (s0 + 3330[s0>>1f]) / (pow(2.0, 6))
0xF7, 0x01, 0x00,						s1 ^= s0
0xF0, 0xE0, 0x06, 0x00,					s6 = 0x00
0xF9, 0xE0, 0x00, 0x00,					s0 = input[0]
0xF1, 0x00, 0x01,						s0 += s1
0xF0, 0xE1, 0x07, 0x00,					s7 = s0
0xF0, 0xE1, 0x00, 0x04,					s0 = s4
0xF0, 0xE0, 0x06, 0x07,					s6 = 0x7
0xF6, 0x00, 0x06,	 					s0 = (s0 + 3330[s0>>1f]) / (pow(2.0, 7))
0xF0, 0xE0, 0x06, 0x03,					s6 = 0x03
0xF8, 0x06, 0x00,						s6 &= s0
0xF9, 0xE0, 0x00, 0x01,					s0 = a_put_some_sugar[s6]
0xF1, 0x00, 0x04,						s0 += s4
0xF7, 0x07, 0x00,						s7 ^= s0
0xF0, 0xE0, 0x06, 0x01,					s6 = 0x01
0xF9, 0xE0, 0x00, 0x00,					s0 = input[1]
0xF1, 0x00, 0x07,						s0 += s7
0xF0, 0xE0, 0x06, 0x01,					s6 = 0x01
0xF9, 0xE1, 0x00, 0x00,					input[1] = s0
label2:
0xF0, 0xE0, 0x06, 0x01,					s6 = 0x01
0xF1, 0x03, 0x06,						s3 += s6
0xFA, 0x03, 0x05,						if(s3 > s5）40451c处写入1
0xF4, 0xD4,		若40451c处为1，则goto label3;否则goto label1
label3:
0xFB, 0x00, 0x00, 0x00

在进行指令分析的过程中，可以知道例如上述第六行处的s6=0x01只是为了配合下面的s0=input[1]的，故可以进行指令的简化。不过第21行和第41行处给s6赋值之后有对s6的运算，不是为了配合其他虚拟机指令，不可省略。

#define _CRT_SECURE_NO_WARNINGS
#include<math.h>
#include<stdio.h>
//#include<xmmintrin.h>
//#include<intrin.h>

typedef unsigned int dword;
typedef unsigned char byte;

dword input_0__dword_404510[3] = { 0 };
dword* input_1__dword_404514 = &input_0__dword_404510[1];
byte unk_404544[16 + 1] = "put_some_sugar!!";
dword dword_404170 = 0x114514;
dword* off_403300[4] = { input_0__dword_404510, (dword*)&unk_404544, &dword_404170, 0 };

dword* input = &input_0__dword_404510[0];
dword* dword_put_some_sugar = (dword*)unk_404544;

double qword_403330[2] = { 0, 4294967296 };
dword xmmword451c[2] = { 0 };
double pow_2_5 = pow(2.0, 5.0);
double pow_2_6 = pow(2.0, 6.0);
double pow_2_7 = pow(2.0, 7.0);

void keyfunc2_encrypt_simplify()
{
	dword s0; dword s1; dword s2; dword s3;
	dword s4; dword s5; dword s6; dword s7;
	s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7 = 0;
	dword* input = &input_0__dword_404510[0];
	printf("第二部分输入：");
	scanf("%s", (byte*)(input));
	double temp = 0; double var8 = 0;	temp = pow(2.0, 7.0);
	

	s5 = 0x4d;
	s4 = 0;
	s3 = 0;
	goto label2;
label1:
	s0 = input[1];
	s0 = (int)(__int64)(pow_2_5 * ((double)(int)(s0)+qword_403330[s0 >> 0x1f]));
	s1 = s0;
	s0 = input[1];
	s0 = (int)(__int64)(((double)(int)s0 + qword_403330[s0 >> 0x1f]) / pow_2_6);
	s1 ^= s0;
	s0 = input[1];
	s0 += s1;
	s7 = s0;
	s0 = s4;
	s6 = 3;
	s6 &= s0;
	s0 = *(dword*)(&unk_404544[s6 << 2]);
	s0 += s4;
	s7 ^= s0;

	s0 = input[0];
	s0 += s7;
	input[0] = s0;
	s2 = dword_404170;
	s4 -= s2;
	s0 = input[0];
	s0 = (int)(__int64)(pow_2_5 * ((double)(int)s0 + qword_403330[s0 >> 0x1f]));
	s1 = s0;
	s0 = input[0];
	s0 = (int)(__int64)(((double)(int)s0 + qword_403330[s0 >> 0x1f]) / pow_2_6);
	s1 ^= s0;
	s0 = input[0];
	s0 += s1;
	s7 = s0;
	s0 = s4;
	var8 = (double)(int)s0 + qword_403330[s0 >> 0x1f];
	s0 = (int)(__int64)(var8 / pow_2_7);
	s6 = 0x03;
	s6 &= s0;
	s0 = *(dword*)(&unk_404544[s6 << 2]);//
	s0 += s4;
	s7 ^= s0;
	s0 = input[1];
	s0 += s7;
	input[1] = s0;
label2:
	s6 = 0x01;
	s3 += s6;
	if (s3 > s5)
	{
		xmmword451c[0] = 1;
	}
	if (xmmword451c[0] == 1)
	{
		printf("%x, %x\n", input[0], input[1]);
		printf("here is the function end.\n");
		return;
	}
	else
	{
		goto label1;
	}
}

进一步简化为

#define _CRT_SECURE_NO_WARNINGS
#include<math.h>
#include<stdio.h>
//#include<xmmintrin.h>
//#include<intrin.h>

typedef unsigned int dword;
typedef unsigned char byte;

dword input_0__dword_404510[3] = { 0 };
dword* input_1__dword_404514 = &input_0__dword_404510[1];
byte unk_404544[16 + 1] = "put_some_sugar!!";
dword dword_404170 = 0x114514;
dword* off_403300[4] = { input_0__dword_404510, (dword*)&unk_404544, &dword_404170, 0 };

dword* input = &input_0__dword_404510[0];
dword* dword_put_some_sugar = (dword*)unk_404544;

double qword_403330[2] = { 0, 4294967296 };
dword xmmword451c[2] = { 0 };
double pow_2_5 = pow(2.0, 5.0);
double pow_2_6 = pow(2.0, 6.0);
double pow_2_7 = pow(2.0, 7.0);

inline int f5(dword s0)
{
	return (int)(__int64)(pow_2_5 * ((double)(int)s0 + qword_403330[s0 >> 0x1f]));
}
inline int f6(dword s0)
{
	return (int)(__int64)(((double)(int)s0 + qword_403330[s0 >> 0x1f]) / pow_2_6);
}
inline int f6_27(dword s0)
{
	return (int)(__int64)(((double)(int)s0 + qword_403330[s0 >> 0x1f]) / pow_2_7);
}
void keyfunc2_encrypt_simplify_optimize()
{
	dword s0; dword s1; dword s2; dword s3;
	dword s4; dword s5; dword s6; dword s7;
	s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7 = 0;
	dword* input = &input_0__dword_404510[0];
	input[0] = 0x33323130;
	input[1] = 0x37363534;

	dword* dword_put_some_sugar = (dword*)unk_404544;
	
	for (char counter = 1; counter <= 0x4d; counter++)
	{
		s7 = input[1] + (f5(input[1]) ^ f6(input[1]));
		s7 = s7 ^ (dword_put_some_sugar[s4 & 3] + s4);
		input[0] = input[0] + s7;

		s4 -= 0x114514;

		s7 = input[0] + (f5(input[0]) ^ f6(input[0]));
		s7 = s7 ^ (dword_put_some_sugar[f6_27(s4) & 3] + s4);
		input[1] = input[1] + s7;
		
	}
	printf("%x, %x, %x\n", input[0], input[1], s4);
	return;
}

由此写解密函数，并解密：

#define _CRT_SECURE_NO_WARNINGS
#include<math.h>
#include<stdio.h>
//#include<xmmintrin.h>
//#include<intrin.h>

typedef unsigned int dword;
typedef unsigned char byte;

dword input_0__dword_404510[3] = { 0 };
dword* input_1__dword_404514 = &input_0__dword_404510[1];
byte unk_404544[16 + 1] = "put_some_sugar!!";
dword dword_404170 = 0x114514;
dword* off_403300[4] = { input_0__dword_404510, (dword*)&unk_404544, &dword_404170, 0 };

dword* input = &input_0__dword_404510[0];
dword* dword_put_some_sugar = (dword*)unk_404544;

double qword_403330[2] = { 0, 4294967296 };
dword xmmword451c[2] = { 0 };
double pow_2_5 = pow(2.0, 5.0);
double pow_2_6 = pow(2.0, 6.0);
double pow_2_7 = pow(2.0, 7.0);

void keyfunc2_decrypt()
{
	dword s0; dword s1; dword s2; dword s3;
	dword s4; dword s5; dword s6; dword s7;
	s0 = s1 = s2 = s3 = s5 = s6 = s7 = 0;
	
	
	s4 = 0xface38fc;


	for (char counter = 1; counter <= 0x4d; counter++)
	{
		s7 = input[0] + (f5(input[0]) ^ f6(input[0]));
		s7 = s7 ^ (dword_put_some_sugar[f6_27(s4) & 3] + s4);
		input[1] -= s7;

		s4 += 0x114514;

		s7 = input[1] + (f5(input[1]) ^ f6(input[1]));
		s7 = s7 ^ (dword_put_some_sugar[s4 & 3] + s4);
		input[0] -= s7;
	}
	printf("%x, %x\n", input[0], input[1]);
	return;
}

int main()
{
	input[0] = 0xD5BF7CB6;
	input[1] = 0x1CC08FA5;
	keyfunc2_decrypt();
	input[0] = 0x80D48DE8;
	input[1] = 0x06C3F5F0;
	keyfunc2_decrypt();
	input[0] = 0x7E484457;
	input[1] = 0x0BFAEB3A6;
	keyfunc2_decrypt();
	input[0] = 0x0B44A2A23;
	input[1] = 0x3EBB5B15;
	keyfunc2_decrypt();
	return 0;
}

6. 其他

对于编号为0xF5和0xF6的两个函数，在还原浮点运算时由于是初次接触，费了不少力气。也得知要编译出那样的指令需要在vs2019中开启增强指令选项：C/C++ -> 代码生成 -> 启用增强指令集：流式处理 SIMD 扩展 2 (/arch:SSE2)。另外也开启了/O2 /Oi（都在C/C++ -> 优化下）。
类似0xF6函数中

void sub_401200(byte* heap)
{
	byte* ecx = (byte*)(*(dword*)(&heap[0x20]));
	int eax = *(int*)(heap_addr_dword4518[ecx[1]]);
	double var8 = (double)eax + qword_403330[(dword)eax >> 0x1f];
	eax = *(dword*)heap_addr_dword4518[ecx[2]];
	double temp = pow(xmm403310, (double)eax + qword_403330[(dword)eax >> 0x1f]);
	*(int*)heap_addr_dword4518[ecx[1]] = (int)(__int64)(var8 / temp);
	*(dword*)(&heap[0x20]) += 3;
	return;
}

的*(int*)heap_addr_dword4518[ecx[1]] = (int)(__int64)(var8 / temp);是为了编译出调用_dtoi3函数的代码。而之所以要先转换为__int64，个人猜测是因为除法所得结果会大于0xffff_ffff。

.text:00401267                 divsd   xmm1, xmm0
.text:0040126B                 movaps  xmm0, xmm1
.text:0040126E                 call    sub_4024D0 //在高版本的ida中可以看到这里调用_dtoi3函数

*(int*)heap_addr_dword4518[ecx[1]] = (unsigned int)(var8 / temp);会编译出：

00412F95  divsd       xmm1,xmm0  
00412F99  movaps      xmm0,xmm1  
00412F9C  call        __dtoui3 (04112C6h)

*(int*)heap_addr_dword4518[ecx[1]] = (int)(var8 / temp);会编译出：

00412F95  divsd       xmm1,xmm0  
00412F99  cvttsd2si   eax,xmm1

都无法编译出跟题目程序中类似的代码。

posted on 2025-05-30 15:45 NJyO 阅读(59) 评论(0) 收藏举报