表达式模板

介绍Expression template,本文是一个学习笔记,主要参考 mashadow项目关于表达式模板的教程

https://github.com/dmlc/mshadow/tree/master/guide/exp-template

   

  • 类似matlab形式的向量操作如 VEC0 = VEC1 + VEC2 + VEC3存在效率问题

class Vec

{

public:

Vec(int len)

{

data.resize(len, 0);

Pval(&data[0]);

}

Vec()

{

LOG(INFO) << "Default construct";

}

~Vec()

{

LOG(INFO) << "Destruct";

}

Vec(const initializer_list<int>& il)

:data(il)

{

   

}

vector<int> data;

};

   

   

Vec test_return()

{

Vec vec;

return vec;

}

   

TEST(simple1, func)

{

Vec vec = test_return();

}

   

   

I0512 11:51:37.124034 22030 test_simple.cc:37] Default construct

I0512 11:51:37.124111 22030 test_simple.cc:41] Destruct

这证明了 c++会对返回值做优化 也就是说没有copy的代价 临时变量优化

   

对应vec = vec1 + vec2 类似的也是会有临时变量优化 不会有copy冗余代价 一次resize而已 3次destruct

   

TEST(simple2, func)

{

Vec b{ 3, 2, 1 }, c{ 2, 3, 4 };

Vec a = b + c;

}

   

I0512 11:57:51.699890 16547 test_simple.cc:33] &data[0] --- [0x8b0340]

I0512 11:57:51.699898 16547 test_simple.cc:58] vec.data --- 3

I0512 11:57:51.699903 16547 test_simple.cc:58] 0 5

I0512 11:57:51.699906 16547 test_simple.cc:58] 1 5

I0512 11:57:51.699911 16547 test_simple.cc:58] 2 5

I0512 11:57:51.699914 16547 test_simple.cc:41] Destruct

I0512 11:57:51.699919 16547 test_simple.cc:41] Destruct

I0512 11:57:51.699923 16547 test_simple.cc:41] Destruct

   

但是expression template还是有必要 比如比如

vec = vec1 + vec2 + vec3

这个时候临时开辟空间释放是逃不掉的 2次resize 4 + 1次destruct, 有一个次多余的resize 对应一次多余的destruct

   

TEST(simple3, func)

{

Vec b{ 3, 2, 1 }, c{ 2, 3, 4 }, d{ 123, 45, 30 };

Vec a = b + c + d;

}

   

I0512 11:59:40.398268 25704 test_simple.cc:33] &data[0] --- [0x8b1330]

I0512 11:59:40.398275 25704 test_simple.cc:58] vec.data --- 3

I0512 11:59:40.398279 25704 test_simple.cc:58] 0 5

I0512 11:59:40.398284 25704 test_simple.cc:58] 1 5

I0512 11:59:40.398288 25704 test_simple.cc:58] 2 5

I0512 11:59:40.398293 25704 test_simple.cc:33] &data[0] --- [0x8b1320]

I0512 11:59:40.398298 25704 test_simple.cc:58] vec.data --- 3

I0512 11:59:40.398304 25704 test_simple.cc:58] 0 128

I0512 11:59:40.398310 25704 test_simple.cc:58] 1 50

I0512 11:59:40.398318 25704 test_simple.cc:58] 2 35

I0512 11:59:40.398321 25704 test_simple.cc:41] Destruct

I0512 11:59:40.398325 25704 test_simple.cc:41] Destruct

I0512 11:59:40.398329 25704 test_simple.cc:41] Destruct

I0512 11:59:40.398334 25704 test_simple.cc:41] Destruct

I0512 11:59:40.398339 25704 test_simple.cc:41] Destruct

   

  • 延迟计算( lazy evaluation)

   

当看到+的时候记忆下来不立即处理,当看到=的时候再分配一次空间 统一处理

A = B + C

教程里面给出了示例

exp_lazy.cpp 来自 <https://github.com/dmlc/mshadow/tree/master/guide/exp-template>

struct BinaryAddExp {
const Vec &lhs;
const Vec &rhs;
BinaryAddExp(const Vec &lhs, const Vec &rhs)
: lhs(lhs), rhs(rhs) {}
};
// no constructor and destructor to allocate and de-allocate memory,
// allocation done by user
struct Vec {
int len;
float* dptr;
Vec(void) {}
Vec(float *dptr, int len)
: len(len), dptr(dptr) {}
// here is where evaluation happens
inline Vec &operator=(const BinaryAddExp &src) {
for (int i = 0; i < len; ++i) {
dptr[i] = src.lhs.dptr[i] + src.rhs.dptr[i];
}
return *this;
}
};
// no evaluation happens here
inline BinaryAddExp operator+(const Vec &lhs, const Vec &rhs) {
return BinaryAddExp(lhs, rhs);
}
但是

A = B + C + D

多个操作的时候如何传递这种操作记忆? 只是延迟计算的话读一个一次+操作上面可以看到c++11已经可以处理多余的分配空间了

  • 表达式模板(expression template)

   

类似教程中的 稍作改写 其实我们在执行=的时候 对应的是这样的类型

Exp<BinaryAddExp<BinaryAddExp<Vec, Vec>, Vec> >

这个是编译器在在编译期间确定好的

   

// this is expression, all expressions must inheritate it,

// and put their type in subtype

template<typename SubType>

struct Exp {

// returns const reference of the actual type of this expression

inline const SubType& self(void) const {

return *static_cast<const SubType*>(this);

}

};

   

// binary add expression

// note how it is inheritates from Exp

// and put its own type into the template argument

template<typename TLhs, typename TRhs>

struct BinaryAddExp : public Exp<BinaryAddExp<TLhs, TRhs> > {

const TLhs &lhs;

const TRhs &rhs;

BinaryAddExp(const TLhs& lhs, const TRhs& rhs)

: lhs(lhs), rhs(rhs) {}

// evaluation function, evaluate this expression at position i

inline float Eval(int i) const {

return lhs.Eval(i) + rhs.Eval(i);

}

   

inline size_t size()

{

return lhs.size();

}

};

   

class Vec : public Exp<Vec>

{

public:

Vec(int len)

{

data.resize(len, 0);

Pval(&data[0]);

}

Vec()

{

LOG(INFO) << "Default construct";

}

~Vec()

{

LOG(INFO) << "Destruct";

}

Vec(const initializer_list<int>& il)

:data(il)

{

   

}

   

inline size_t size()

{

return data.size();

}

// here is where evaluation happens

template<typename EType>

inline Vec& operator= (const Exp<EType>& src_) {

const EType &src = src_.self();

Pval(src.size());

data.resize(src.size());

for (size_t i = 0; i < src.size(); ++i) {

data[i] = src.Eval(i);

}

return *this;

}

// evaluation function, evaluate this expression at position i

inline int Eval(int i) const {

return data[i];

}

   

vector<int> data;

};

   

   

// template add, works for any expressions

template<typename TLhs, typename TRhs>

inline BinaryAddExp<TLhs, TRhs> operator+(const Exp<TLhs> &lhs, const Exp<TRhs> &rhs)

{

return BinaryAddExp<TLhs, TRhs>(lhs.self(), rhs.self());

}

   

   

void run()

{

Vec b{ 3, 2, 1 }, c{ 2, 3, 4 }, d{ 123, 45, 30 };

Vec a;

a = b + c + d;

Pvec(a.data);

}

   

   

4次析构 没有多余的构造

I0512 15:15:55.864331 16127 exp_template.cc:70] Default construct

I0512 15:15:55.864430 16127 exp_template.cc:90] src.size() --- [3]

I0512 15:15:55.864437 16127 exp_template.cc:119] a.data --- 3

I0512 15:15:55.864444 16127 exp_template.cc:119] 0 128

I0512 15:15:55.864449 16127 exp_template.cc:119] 1 50

I0512 15:15:55.864452 16127 exp_template.cc:119] 2 35

I0512 15:15:55.864456 16127 exp_template.cc:74] Destruct

I0512 15:15:55.864460 16127 exp_template.cc:74] Destruct

I0512 15:15:55.864464 16127 exp_template.cc:74] Destruct

I0512 15:15:55.864470 16127 exp_template.cc:74] Destruct

   

如果要支持

Vec a = b + c + d; //拷贝构造函数

需要增加

template<typename EType>

Vec(const Exp<EType>& src_)

{

*this = src_;

}

   

   

  • 支持用户自定义的二元操作

类似下面的计算

A = B * F<maximum>(C, B);

   

   

// binary add expression
// note how it is inheritates from Exp
// and put its own type into the template argument
template<typename OP, typename TLhs, typename TRhs>
struct BinaryMapExp: public Exp<BinaryMapExp<OP, TLhs, TRhs> >{
const TLhs& lhs;
const TRhs& rhs;
BinaryMapExp(const TLhs& lhs, const TRhs& rhs)
:lhs(lhs), rhs(rhs) {}
// evaluation function, evaluate this expression at position i
inline float Eval(int i) const {
return OP::Map(lhs.Eval(i), rhs.Eval(i));
}
};
// no constructor and destructor to allocate and de-allocate memory
// allocation done by user
struct Vec: public Exp<Vec>{
int len;
float* dptr;
Vec(void) {}
Vec(float *dptr, int len)
: len(len), dptr(dptr) {}
// here is where evaluation happens
template<typename EType>
inline Vec& operator=(const Exp<EType>& src_) {
const EType &src = src_.self();
for (int i = 0; i < len; ++i) {
dptr[i] = src.Eval(i);
}
return *this;
}
// evaluation function, evaluate this expression at position i
inline float Eval(int i) const {
return dptr[i];
}
};
// template add, works for any expressions
template<typename OP, typename TLhs, typename TRhs>
inline BinaryMapExp<OP, TLhs, TRhs>
F(const Exp<TLhs>& lhs, const Exp<TRhs>& rhs) {
return BinaryMapExp<OP, TLhs, TRhs>(lhs.self(), rhs.self());
}

template<typename TLhs, typename TRhs>
inline BinaryMapExp<mul, TLhs, TRhs>
operator*(const Exp<TLhs>& lhs, const Exp<TRhs>& rhs) {
return F<mul>(lhs, rhs);
}

// user defined operation
struct maximum{
inline static float Map(float a, float b) {
return a > b ? a : b;
}
};

   

   

posted @ 2015-05-12 15:40  阁子  阅读(677)  评论(0编辑  收藏  举报