Cython二进制逆向系列(三)运算符
Cython二进制逆向系列(三)运算符
在开始前,先给出本文用到的py源代码
def test1(x, y):
# 数学运算符
a = x + y
b = x - y
c = x * y
d = x / y
e = x // y
f = x % y
g = x ** y
# 位运算符
h = x & y
i = x | y
j = x ^ y
k = ~x
l = x >> 4
m = x << 2
print(a, b, c, d, e, f, g, h, i, j, k, l, m)
def test2(x, y):
# in/not in 运算符
if x in y:
x = y
elif x not in y:
y = x
print(x, y)
def test3(x, y):
# ==运算符与逻辑运算符
print(x == 0 and y == 0)
print(y == 0 or x == 0)
print(not x==0)
if __name__ == '__main__':
test1(1, 2)
test2(1, 2)
test3(1, 2)
在这篇文章里,我们会讨论Cython是如何处理运算符的(数学运算符、位运算符、in/not in 运算符、 ==运算符与逻辑运算符)。总的来叔其中大部分是调用虚拟机api来实现的。
数学运算符与位运算符号

可以看得出来全是调用虚拟机的api
下面给出运算符与api的对应表(其实看名字大概都能猜出来):
| 符号 | 含义 | 函数名 |
|---|---|---|
| + | 加 | PyNumber_Add |
| - | 减 | PyNumber_Subtract |
| * | 乘 | PyNumber_Multiply |
| / | 除 | __Pyx_PyNumber_Divide |
| // | 整除 | PyNumber_FloorDivide |
| % | 取模 | PyNumber_Remainder |
| ** | 乘方 | PyNumber_Power |
| & | 按位与 | PyNumber_And |
| | | 按位或 | PyNumber_Or |
| ^ | 按位异或 | PyNumber_Xor |
| ~ | 按位取非 | PyNumber_Invert |
| >> | 右移 | PyNumber_Rshift |
| << | 左移 | PyNumber_Lshift |
这里单独看一下位移在ida中的体现
v24 = off_1800095B8[32];
if ( *(_QWORD *)(v4 + 8) != PyLong_Type[0] )
{
v27 = PyNumber_Rshift(v4, off_1800095B8[32]);
LABEL_35:
v4 = v27;
goto LABEL_36;
}
v25 = *(_QWORD *)(v4 + 16);
if ( v25 )
{
if ( ((v25 + 1) & 0xFFFFFFFFFFFFFFFDui64) != 0 )
{
v26 = v25 + 4;
switch ( v26 )
{
case 2i64:
v27 = PyLong_FromLongLong(
-(__int64)(*(unsigned int *)(v4 + 24) | ((unsigned __int64)*(unsigned int *)(v4 + 28) << 30)) >> 4,
v26,
v24,
0x180000000ui64);
break;
case 6i64:
v27 = PyLong_FromLongLong(
(__int64)(*(unsigned int *)(v4 + 24) | ((unsigned __int64)*(unsigned int *)(v4 + 28) << 30)) >> 4,
v26,
v24,
0x180000000ui64);
break;
default:
v27 = (*(__int64 (__fastcall **)(__int64, _QWORD *))(PyLong_Type[12] + 96i64))(v4, off_1800095B8[32]);
break;
}
}
else
{
v28 = -*(_DWORD *)(v4 + 24);
if ( v25 >= 0 )
v28 = *(_DWORD *)(v4 + 24);
v27 = PyLong_FromLong((unsigned int)(v28 >> 4), v25, v24, 0x180000000ui64);
}
goto LABEL_35;
}
++*(_QWORD *)v4;
LABEL_36:
if ( !v4 )
{
v12 = 2534i64;
v13 = 13i64;
goto LABEL_58;
}
v10 = (_QWORD *)v4;
off_1800095B8[32]中储存就是4,这里python为了安全性还有对于整数的处理做了安全措施,我们可以看到在else后面PyLong_FromLong((unsigned int)(v28 >> 4), v25, v24, 0x180000000ui64);这里也可以看到是右移多少。
问题是,这里好像没看到表格中的PyNumber_Rshift?因为py源代码中位移的位数是立即数,因此直接转换为c语言的位移运算符就好了。但是如果是x>>y这样的两个都是变量,就会调用api PyNumber_Rshift

in/not in 运算符
/* "test.py":21
* def test2(x, y):
* # in/not in
* if x in y: # <<<<<<<<<<<<<<
* x = y
* elif x not in y:
*/
__pyx_t_1 = (__Pyx_PySequence_ContainsTF(__pyx_v_x, __pyx_v_y, Py_EQ)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 21, __pyx_L1_error)
。。。。。。
/* "test.py":23
* if x in y:
* x = y
* elif x not in y: # <<<<<<<<<<<<<<
* y = x
* print(x, y)
*/
__pyx_t_1 = (__Pyx_PySequence_ContainsTF(__pyx_v_x, __pyx_v_y, Py_NE)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 23, __pyx_L1_error)
这里涉及到一些条件语句的转换,不过没关系,照样能看懂
在c代码中可以看到无论是in还是 not in 调用的都是函数__Pyx_PySequence_ContainsTF。其前两个参数是前后两个参与运算的变量,而第三个参数Py_EQ/Py_NE则决定当前运算到底是in还是 not in

不幸的是,无论是in还是not in ,在ida中都是PySequence_Contains,具体是哪个要结合上下文分析。比如这里v5 = PySequence_Contains(a3) 判断的是 a3 中是否包含 a2。如果 v5 == 1,表示 a2 在 a3 中,则进入接下来的操作(++*v3 和调整 v4 和 v3 的指向)。
而下面那个v9 = PySequence_Contains(v3) 判断的是 v3 中是否包含 v4(即 v4 not in v3)。这里,如果 v9 == 0,表示 v4 不在 v3 中,符合 not in 的语义。因为当 v9 == 0 时表示 v4 不在 v3 中。
说人话就是看后续是对PySequence_Contains的返回值和谁比较(1或者0)。
==运算符与逻辑运算符
逻辑与运算符的处理
/* "test.py":30
* def test3(x, y):
* # ==
* print(x == 0 and y == 0) # <<<<<<<<<<<<<<
* print(y == 0 or x == 0)
* print(not x==0)
*/
__pyx_t_2 = __Pyx_PyInt_EqObjC(__pyx_v_x, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 30, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
__pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(0, 30, __pyx_L1_error)
if (__pyx_t_3) {
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
} else {
__Pyx_INCREF(__pyx_t_2);
__pyx_t_1 = __pyx_t_2;
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
goto __pyx_L3_bool_binop_done;
}
__pyx_t_2 = __Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 30, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
__Pyx_INCREF(__pyx_t_2);
__pyx_t_1 = __pyx_t_2;
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
__pyx_L3_bool_binop_done:;
__Pyx_PyInt_EqObjC(__pyx_v_x, __pyx_int_0, 0, 0): 这行代码将 x == 0 的比较操作转换为 C 语言函数。它检查 x 是否等于 0。(猜测不同类型的==有对应的函数,暂未验证)。

ida中比较==0的部分,看得出来它把变量分为int float 和其他三种情况,除了整数和浮点,一概用PyObject_RichCompare比较。
在 C 代码中,and 逻辑运算符的处理通常是短路的。即,如果第一个条件为 False,那么第二个条件不会被计算。在这里,编译后的代码会继续执行 y == 0 的检查,只有在 x == 0 为 True 时才会检查 y == 0。
然后__Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0) 检查 y == 0,并根据结果将 __pyx_t_2 设置为布尔值。

ida中对and的处理也差不多类似。看着有点恶心,全是if else条件分支和各种goto
逻辑或运算符的处理
/* "test.py":31
* # ==
* print(x == 0 and y == 0)
* print(y == 0 or x == 0) # <<<<<<<<<<<<<<
* print(not x==0)
*
*/
__pyx_t_1 = __Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 31, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(0, 31, __pyx_L1_error)
if (!__pyx_t_3) {
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
} else {
__Pyx_INCREF(__pyx_t_1);
__pyx_t_2 = __pyx_t_1;
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
goto __pyx_L5_bool_binop_done;
}
__pyx_t_1 = __Pyx_PyInt_EqObjC(__pyx_v_x, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 31, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__Pyx_INCREF(__pyx_t_1);
__pyx_t_2 = __pyx_t_1;
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
__pyx_L5_bool_binop_done:;
前面都是在处理== :__Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0): 检查 y == 0,即比较 y 是否等于 0。__Pyx_PyObject_IsTrue(__pyx_t_1): 将 __pyx_t_1 转换为布尔值。如果 y == 0(即 __pyx_t_3 为 True),就直接跳到 __pyx_L5_bool_binop_done,并将 __pyx_t_1(存储 y == 0 结果)传递给下一个操作。
在执行 or 运算时,短路操作符同样会起作用:如果 y == 0 为 True,则 x == 0 的比较不会被执行,结果会直接为 True。__pyx_t_2 保存了 y == 0 或 x == 0 的结果,它将作为最终的结果传递给 print 函数。
逻辑非运算符的处理
/* "test.py":32
* print(x == 0 and y == 0)
* print(y == 0 or x == 0)
* print(not x==0) # <<<<<<<<<<<<<<
*
*
*/
__pyx_t_3 = (__Pyx_PyInt_BoolEqObjC(__pyx_v_x, __pyx_int_0, 0, 0)); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(0, 32, __pyx_L1_error)
__pyx_t_1 = __Pyx_PyBool_FromLong((!__pyx_t_3)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 32, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_2 = __Pyx_PyObject_CallOneArg(__pyx_builtin_print, __pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 32, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
!__pyx_t_3:这行代码计算 not x == 0。由于 __pyx_t_3 是 x == 0 的布尔值,!__pyx_t_3 就是其逻辑取反。__Pyx_PyBool_FromLong((!__pyx_t_3))将 !__pyx_t_3 转换为 Python 的布尔对象。如果 !__pyx_t_3 为 0,则返回 False;如果 !__pyx_t_3 为 1,则返回 True。

如果以后逆向在这里出题,考察逻辑运算符,那就认命吧,这里反编译出的代码很绕。
下面粘上test3函数的反编译代码。
// write access to const memory has been detected, the output may be wrong!
__int64 __fastcall sub_180001E30(__int64 a1, __int64 a2, __int64 a3)
{
v5 = *((_QWORD *)off_18000B688 + 35);
if ( a2 == v5 )
goto LABEL_2;
v7 = *(_QWORD *)(a2 + 8);
if ( v7 == PyLong_Type )
{
if ( *(_QWORD *)(a2 + 16) )
{
LABEL_5:
v6 = (_QWORD *)++Py_FalseStruct;
goto LABEL_10;
}
LABEL_2:
v6 = (_QWORD *)++Py_TrueStruct;
goto LABEL_10;
}
if ( v7 == PyFloat_Type )
{
if ( *(double *)(a2 + 16) != 0.0 )
goto LABEL_5;
goto LABEL_2;
}
v6 = (_QWORD *)PyObject_RichCompare(a2, v5, 2LL);
LABEL_10:
if ( !v6 )
{
v8 = 30;
v9 = 3136;
LABEL_75:
sub_180005F50("test.test3", v9, v8, (__int64)"test.py");
return 0LL;
}
IsTrue = v6 == (_QWORD *)Py_TrueStruct;
v11 = v6 == (_QWORD *)Py_NoneStruct;
v12 = IsTrue | v11 | (unsigned int)(v6 == (_QWORD *)Py_FalseStruct);
if ( !(IsTrue | (v11 || v6 == (_QWORD *)Py_FalseStruct)) )
IsTrue = PyObject_IsTrue(v6);
if ( IsTrue < 0 )
{
v8 = 30;
v9 = 3138;
goto LABEL_73;
}
v13 = *v6;
if ( !IsTrue )
{
*v6 = v13;
v16 = v6;
if ( v13 )
goto LABEL_26;
v18 = v6;
goto LABEL_25;
}
v14 = v13 - 1;
*v6 = v14;
if ( !v14 )
Py_Dealloc(v6);
v15 = (_QWORD *)sub_180004780(a3, *((_QWORD *)off_18000B688 + 35));
v16 = v15;
if ( !v15 )
{
v8 = 30;
v9 = 3147;
goto LABEL_75;
}
v17 = *v15;
*v16 = v17;
if ( !v17 )
{
v18 = v16;
LABEL_25:
Py_Dealloc(v18);
}
LABEL_26:
v6 = v16;
v19 = (_QWORD *)sub_1800048D0(v12, v16);
if ( !v19 )
{
v8 = 30;
v9 = 3153;
if ( !v6 )
goto LABEL_75;
LABEL_73:
v20 = (*v6)-- == 1LL;
if ( v20 )
Py_Dealloc(v6);
goto LABEL_75;
}
v20 = (*v16)-- == 1LL;
if ( v20 )
Py_Dealloc(v16);
v20 = (*v19)-- == 1LL;
if ( v20 )
Py_Dealloc(v19);
v21 = sub_180004780(a3, *((_QWORD *)off_18000B688 + 35));
v6 = (_QWORD *)v21;
if ( !v21 )
{
v8 = 31;
v9 = 3165;
goto LABEL_75;
}
v22 = sub_180006570(v21);
v23 = (unsigned int)v22;
if ( v22 < 0 )
{
v8 = 31;
v9 = 3167;
goto LABEL_73;
}
v24 = *v6;
if ( !(_DWORD)v23 )
{
v25 = v24 - 1;
*v6 = v25;
if ( !v25 )
Py_Dealloc(v6);
v26 = (_QWORD *)sub_180004780(a2, *((_QWORD *)off_18000B688 + 35));
v6 = v26;
if ( !v26 )
{
v8 = 31;
v9 = 3176;
goto LABEL_75;
}
v24 = *v26;
}
*v6 = v24;
if ( !v24 )
Py_Dealloc(v6);
v28 = (_QWORD *)sub_1800048D0(v23, v6);
if ( !v28 )
{
v8 = 31;
v9 = 3182;
if ( !v6 )
goto LABEL_75;
goto LABEL_73;
}
v20 = (*v6)-- == 1LL;
if ( v20 )
Py_Dealloc(v6);
v20 = (*v28)-- == 1LL;
if ( v20 )
Py_Dealloc(v28);
v29 = *((_QWORD *)off_18000B688 + 35);
if ( a2 == v29 )
goto LABEL_68;
v30 = *(_QWORD *)(a2 + 8);
if ( v30 == PyLong_Type )
{
v31 = *(_QWORD *)(a2 + 16) == 0LL;
}
else if ( v30 == PyFloat_Type )
{
if ( *(double *)(a2 + 16) == 0.0 )
goto LABEL_68;
v31 = 0;
}
else
{
v32 = PyObject_RichCompare(a2, v29, 2LL);
v33 = (_QWORD *)v32;
if ( v32 )
{
v31 = v32 == Py_TrueStruct;
v34 = v32 == Py_NoneStruct;
v27 = v31 | v34 | (unsigned int)(v33 == (_QWORD *)Py_FalseStruct);
if ( !(v31 | (v34 || v33 == (_QWORD *)Py_FalseStruct)) )
v31 = PyObject_IsTrue(v33);
v20 = (*v33)-- == 1LL;
if ( v20 )
Py_Dealloc(v33);
}
else
{
v31 = -1;
}
}
if ( v31 < 0 )
{
v8 = 32;
v9 = 3194;
goto LABEL_75;
}
if ( !v31 )
{
v6 = (_QWORD *)++Py_TrueStruct;
goto LABEL_69;
}
LABEL_68:
v6 = (_QWORD *)++Py_FalseStruct;
LABEL_69:
if ( !v6 )
{
v8 = 32;
v9 = 3195;
goto LABEL_75;
}
v35 = (_QWORD *)sub_1800048D0(v27, v6);
if ( !v35 )
{
v8 = 32;
v9 = 3197;
goto LABEL_73;
}
v20 = (*v6)-- == 1LL;
if ( v20 )
Py_Dealloc(v6);
v20 = (*v35)-- == 1LL;
if ( v20 )
Py_Dealloc(v35);
return Py_NoneStruct++;
}

在这篇文章里,我们会讨论Cython是如何处理运算符的(数学运算符、位运算符、in/not in 运算符、 ==运算符与逻辑运算符)。总的来叔其中大部分是调用虚拟机api来实现的。
浙公网安备 33010602011771号