Fork me on GitHub
函数链调用

在编程中有时会把多个函数串在一起依次调用,以达到特定的目的,在这里我们把这样的调用方式形象地称为函数链调用。函数链中的函数有些是独立的,有些则只用在函数组合中,不会单独调用。对象的连续配置和组合是函数链调用比较常用场合,去除语法糖,linq也是一个函数链的运用。下面通过即时编译中的一个例子,来看看函数链的用法。

  几种热门的语言C#javajs都采用了即时编译的方式,即时编译需要有相应的汇编类库供调用,以便把VM代码转换成本地机器代码。

  Apple的汇编类库是比较常用的,被firefox,webkit用于js的即时编译,下面的代码是Apple汇编类库的一般调用方式:

复制代码
masm.push(ecx);
masm.move(ebp, edx);
masm.add32(0x12 ,edx);
masm.push(edx);
masm.load32(MacroAssembler::Address(edx),edx);
masm.push(edx);
masm.load32(MacroAssembler::Address(edx),edx);
masm.add32(edx,r);
masm.call(MacroAssembler::Address(r));
masm.pop(ecx);
复制代码

 下面再看看chrome中v8的调用习惯:

复制代码
#define __ masm()->

__ mov(ebx, Operand(esp, kSavedRegistersAreaSize));
__ Set(ecx, Immediate(0));
__ lea(edx, Operand(esp, kSavedRegistersAreaSize + 1 * kPointerSize));
__ sub(edx, Operand(ebp));
__ neg(edx);
Label pop_loop;
__ bind(&pop_loop);
__ pop(Operand(edx, 0));

#undef __
复制代码

    与前面的调用方式差别不大,通过宏代换使得汇编调用看的更直观,遵循了宏定义用过即取消定义的习惯。

   从上面的代码可以看出,普通的函数调用方式,大部分的汇编码调用还是比较整洁,涉及到内存调用的部分显得有些不太直观,对于 mov [ebx + 2 * ecx + 0x1000]eax这样的语句写起来会有些复杂。下面我们试着看看有没有更直观的方式来表现。

   在这里我们可以看到,函数与真实汇编之间存在一定程度的失配:汇编语言本身是描述的,具有较强的组合能力,而用单个函数去模拟这样的能力,往往有点力不从心,这样失配的结果就引起功能的简化和简洁性的减弱。利用多个函数一起协同的能力,函数链可以用于解决这样的失配问题,使得调用代码书写得象汇编一样简洁。

   下面的代码是一些准备工作,定义了汇编要用到的一些结构,如寄存器、地址、操作还有卷标。寄存器和卷标的代码都非常简单,操作和地址的代码复杂一些,主要是定义了一些操作符的重载,这些函数体现了函数链中函数的特点:要么返回自身或者返回新对象,以备后续调用。另外还有一些宏定义,这些宏都比较简单,不复杂。为简单起见在这里程序并不作实际的本地代码转化工作,只保证书写的代码能编译通过。具体的代码如下:

View Code
struct TNode
{
};

struct TOp;
struct TLabel
{
    inline TOp & operator () (TOp & r){return r;}
};

struct TInt : TNode
{
    int val;
    TInt(int v):val(v){}
};

struct TReg : TNode
{
    int reg;
    TReg(int r):reg(r){}
    TReg():reg(0){}
    inline bool  operator != (TReg & l){ return this->reg != l.reg;}
    inline bool  operator == (TReg & l){ return this->reg == l.reg;}
    inline bool  operator > (TReg & l){ return this->reg > l.reg;}
    inline bool  operator < (TReg & l){ return this->reg < l.reg;}
};

struct TAdr : TNode
{
    int typ;
    TReg* base;
    int scale;
    TReg* index;
    int direct;
    TAdr():typ(0),base(NULL),scale(0),index(NULL),direct(0){}
};

struct TAlloc
{
    static TAdr* allocAdr()
    {
        return new TAdr;
    }
    static TReg* allocReg()
    {
        return new TReg();
    }
    static void free(TAdr* p)
    {
        delete p;
    }
    static void free(TReg* p)
    {
        delete p;
    }
};

inline TAdr & operator + (TReg & l,TReg & r)
{
    TAdr* adr = TAlloc::allocAdr();
    adr->base = &l;
    adr->index = &r;
    return *adr;
};

inline TAdr & operator * (int l,TReg & r)
{
    TAdr* adr = TAlloc::allocAdr();
    adr->scale = l;
    adr->index = &r;
    return *adr;
};

inline TAdr & operator + (TReg & r,int l)
{
    TAdr* adr = TAlloc::allocAdr();
    adr->base = &r;
    adr->direct = l;
    return *adr;
};

inline TAdr & operator + (TAdr & adr,int l)
{
    adr.direct = l;
    return adr;
};

inline TAdr & operator + (TReg & l,TAdr & r)
{
    TAdr* adr = TAlloc::allocAdr();
    adr->base = &l;
    adr->index = r.index;
    adr->scale = r.scale;
    adr->direct += r.direct;
    return *adr;
};

struct TOp
{
    int op;
    TNode* left;
    TNode* right;

    TOp(int _op):op(_op),left(NULL),right(NULL){}
    inline TOp & operator () (TReg & r)
    {
        if(left)
            right = &r;
        else
            left = &r;
        return *this;
    };

    inline TOp & operator () (TAdr & r)
    {
        if(left)
            right = &r;
        else
            left = &r;
        return *this;
    };

    inline TOp & operator () (TInt & r)
    {
        if(left)
            right = &r;
        else
            left = &r;
        return *this;
    };

    inline TOp & operator () (int  r)
    {
        if(left)
            right = &TInt(r);
        else
            left = &TInt(r);
        return *this;
    };

    inline TOp & operator [] (TAdr & r)
    {
        if(left)
            right = &r;
        else
            left = &r;
        return *this;
    };
    inline TOp & operator [] (TReg & r)
    {
        if(left)
            right = &r;
        else
            left = &r;
        return *this;
    };
    inline TOp & operator [] (int r)
    {
        if(left)
            right = &TInt(r);
        else
            left = &TInt(r);
        return *this;
    };
    inline TOp & operator + (TLabel r)
    {
        return *this;
    }
};

struct TOpcode
{
    static const unsigned char mov = 1;
    static const unsigned char add = 2;
    static const unsigned char sub = 3;
    static const unsigned char mul = 4;
    static const unsigned char div = 5;
    static const unsigned char jmp = 6;
    static const unsigned char push = 7;
    static const unsigned char pop  = 8;
    static const unsigned char call = 9;
    static const unsigned char ret  = 10;
};

#define ncode_mov  (TOp(TOpcode::mov)) 
#define ncode_add  (TOp(TOpcode::add))  
#define ncode_sub  (TOp(TOpcode::sub))  
#define ncode_mul  (TOp(TOpcode::mul))  
#define ncode_div  (TOp(TOpcode::div))  
#define ncode_push (TOp(TOpcode::push))  
#define ncode_pop  (TOp(TOpcode::pop))  
#define ncode_jmp  (TOp(TOpcode::jmp)) +  
#define ncode_call (TOp(TOpcode::call))
#define ncode_ret  (TOp(TOpcode::ret))

#define _(x,...) ncode_##x __VA_ARGS__

#define eax (TReg(0))
#define ecx (TReg(1))
#define edx (TReg(2))
#define ebx (TReg(3))
#define esp (TReg(4))
#define ebp (TReg(5))
#define esi (TReg(6))
#define edi (TReg(7))

 通过上面的准备,现在可以书写汇编调用代码了:

复制代码
int _tmain(int argc, _TCHAR* argv[])
{
    TLabel L1,L2;
    _(push ebp);
    _(mov ebp,esp);
    _(push esi);
    _(push edi);
    _(mov ebx, eax);
    _(mov eax,[ebx + 2 * ecx]);
    _(mov [ebx + 2 * ecx + 0x1000],eax);
 L1 _(mov eax,[eax]);
 L2 _(mov eax,[0x1234]);
    _(call eax);
    _(jmp L1);
    _(jmp L2);
    _(pop edi);
    _(pop esi);
    _(mov esp,ebp);
    _(pop ebp);
    _(ret );

    exit(1);
}
复制代码

    是不是看起来像嵌入式汇编代码,但只是形似而已,这里是函数调用,而嵌入式汇编码是执行码。现在看起来是否更直观,YY一下。

 现在再看看宏展开后的实际代码,是不是都是一些函数链调用

复制代码
int wmain(int argc, _TCHAR* argv[])
{
    TLabel L1,L2;
    (TOp(TOpcode::push)) (TReg(5)) ;
    (TOp(TOpcode::mov)) (TReg(5)) (TReg(4));
    (TOp(TOpcode::push)) (TReg(6)) ;
    (TOp(TOpcode::push)) (TReg(7)) ;
    (TOp(TOpcode::mov)) (TReg(3)) (TReg(0));
    (TOp(TOpcode::mov)) (TReg(0)) [(TReg(3)) + 2 * (TReg(1))];
    (TOp(TOpcode::mov)) [(TReg(3)) + 2 * (TReg(1)) + 0x1000] (TReg(0));
 L1 (TOp(TOpcode::mov)) (TReg(0)) [(TReg(0))];
 L2 (TOp(TOpcode::mov)) (TReg(0)) [0x1234];
    (TOp(TOpcode::call)) (TReg(0)) ;
    (TOp(TOpcode::jmp)) + L1 ;
    (TOp(TOpcode::jmp)) + L2 ;
    (TOp(TOpcode::pop)) (TReg(7)) ;
    (TOp(TOpcode::pop)) (TReg(6)) ;
    (TOp(TOpcode::mov)) (TReg(4)) (TReg(5));
    (TOp(TOpcode::pop)) (TReg(5)) ;
    (TOp(TOpcode::ret)) ;

    exit(1);
}
复制代码

    有头晕的感觉吧?正好应了一点,简单的背后是复杂。

 下面是完整的事例代码:

View Code
#include <stdio.h>
#include <stdlib.h>
#include <tchar.h>

struct TNode
{
};

struct TOp;
struct TLabel
{
    inline TOp & operator () (TOp & r){return r;}
};

struct TInt : TNode
{
    int val;
    TInt(int v):val(v){}
};

struct TReg : TNode
{
    int reg;
    TReg(int r):reg(r){}
    TReg():reg(0){}
    inline bool  operator != (TReg & l){ return this->reg != l.reg;}
    inline bool  operator == (TReg & l){ return this->reg == l.reg;}
    inline bool  operator > (TReg & l){ return this->reg > l.reg;}
    inline bool  operator < (TReg & l){ return this->reg < l.reg;}
};

struct TAdr : TNode
{
    int typ;
    TReg* base;
    int scale;
    TReg* index;
    int direct;
    TAdr():typ(0),base(NULL),scale(0),index(NULL),direct(0){}
};

struct TAlloc
{
    static TAdr* allocAdr()
    {
        return new TAdr;
    }
    static TReg* allocReg()
    {
        return new TReg();
    }
    static void free(TAdr* p)
    {
        delete p;
    }
    static void free(TReg* p)
    {
        delete p;
    }
};

inline TAdr & operator + (TReg & l,TReg & r)
{
    TAdr* adr = TAlloc::allocAdr();
    adr->base = &l;
    adr->index = &r;
    return *adr;
};

inline TAdr & operator * (int l,TReg & r)
{
    TAdr* adr = TAlloc::allocAdr();
    adr->scale = l;
    adr->index = &r;
    return *adr;
};

inline TAdr & operator + (TReg & r,int l)
{
    TAdr* adr = TAlloc::allocAdr();
    adr->base = &r;
    adr->direct = l;
    return *adr;
};

inline TAdr & operator + (TAdr & adr,int l)
{
    adr.direct = l;
    return adr;
};

inline TAdr & operator + (TReg & l,TAdr & r)
{
    TAdr* adr = TAlloc::allocAdr();
    adr->base = &l;
    adr->index = r.index;
    adr->scale = r.scale;
    adr->direct += r.direct;
    return *adr;
};

struct TOp
{
    int op;
    TNode* left;
    TNode* right;

    TOp(int _op):op(_op),left(NULL),right(NULL){}
    inline TOp & operator () (TReg & r)
    {
        if(left)
            right = &r;
        else
            left = &r;
        return *this;
    };

    inline TOp & operator () (TAdr & r)
    {
        if(left)
            right = &r;
        else
            left = &r;
        return *this;
    };

    inline TOp & operator () (TInt & r)
    {
        if(left)
            right = &r;
        else
            left = &r;
        return *this;
    };

    inline TOp & operator () (int  r)
    {
        if(left)
            right = &TInt(r);
        else
            left = &TInt(r);
        return *this;
    };

    inline TOp & operator [] (TAdr & r)
    {
        if(left)
            right = &r;
        else
            left = &r;
        return *this;
    };
    inline TOp & operator [] (TReg & r)
    {
        if(left)
            right = &r;
        else
            left = &r;
        return *this;
    };
    inline TOp & operator [] (int r)
    {
        if(left)
            right = &TInt(r);
        else
            left = &TInt(r);
        return *this;
    };
    inline TOp & operator + (TLabel r)
    {
        return *this;
    }
};

struct TOpcode
{
    static const unsigned char mov = 1;
    static const unsigned char add = 2;
    static const unsigned char sub = 3;
    static const unsigned char mul = 4;
    static const unsigned char div = 5;
    static const unsigned char jmp = 6;
    static const unsigned char push = 7;
    static const unsigned char pop  = 8;
    static const unsigned char call = 9;
    static const unsigned char ret  = 10;
};

#define ncode_mov  (TOp(TOpcode::mov)) 
#define ncode_add  (TOp(TOpcode::add))  
#define ncode_sub  (TOp(TOpcode::sub))  
#define ncode_mul  (TOp(TOpcode::mul))  
#define ncode_div  (TOp(TOpcode::div))  
#define ncode_push (TOp(TOpcode::push))  
#define ncode_pop  (TOp(TOpcode::pop))  
#define ncode_jmp  (TOp(TOpcode::jmp)) +  
#define ncode_call (TOp(TOpcode::call))
#define ncode_ret  (TOp(TOpcode::ret))

#define _(x,...) ncode_##x __VA_ARGS__

#define eax (TReg(0))
#define ecx (TReg(1))
#define edx (TReg(2))
#define ebx (TReg(3))
#define esp (TReg(4))
#define ebp (TReg(5))
#define esi (TReg(6))
#define edi (TReg(7))

int _tmain(int argc, _TCHAR* argv[])
{
    TLabel L1,L2;
    _(push ebp);
    _(mov ebp,esp);
    _(push esi);
    _(push edi);
    _(mov ebx, eax);
    _(mov eax,[ebx + 2 * ecx]);
    _(mov [ebx + 2 * ecx + 0x1000],eax);
 L1 _(mov eax,[eax]);
 L2    _(mov eax,[0x1234]);
    _(call eax);
    _(jmp L1);
    _(jmp L2);
    _(pop edi);
    _(pop esi);
    _(mov esp,ebp);
    _(pop ebp);
    _(ret );

    exit(1);
}

 

-----复杂,并不会因奥卡姆剃刀而减少。

 
 
标签: 即时编译
posted on 2013-01-11 14:06  HackerVirus  阅读(502)  评论(0编辑  收藏  举报