// MyMakv.cpp : 定义控制台应用程序的入口点。
//
#include "stdafx.h"
struct strategy
{
int state;
int action;
};// P 建立状态到行动的映射,P 即映射表
strategy Pai [6]; //这里是最终的结果,即建立状态到行动的映射
int ActionSet[4] = {0,1,2,3}; //数字为行动的标签代号
int stateSet[6] = {0,1,2,3,4,5};//数字为行动的标签代号, 假设 这里的 0到5个状态为 摄像头采集的图像记过初步处理
//划分,这里不表示空间定位。
struct StaFactor
{
int s;
double Character; // 假设Character 为某重要因素,例如当前抓拍的图像特征 //这里先随便写
};
StaFactor RealStaFa[6] = {0};// 和s 相关的评估因素,主要用来计算Rs,即当前状态的回报。
double Vps[6] = {0}; //主要用来保存当前p策略下计算出来的Fps的结果,主要用来不断更新局部最优
double Vbackup[6] = {0};
double Rs( int s)
{
return( RealStaFa[s].Character + 0.9); ;//先随便写了
}
double Ps[4][6][6] = { 0.1,0.2,0.3,0.1,0.1,0.2,
0.4,0.1,0.1,0.1,0.1,0.2,
0.1,0.3,0.2,0.1,0.1,0.2,
0.0,0.1,0.9,0.0,0.0,0.0,
0.1,0.2,0.3,0.1,0.1,0.2,
0.1,0.2,0.3,0.1,0.1,0.2,
0.5,0.1,0.1,0.1,0.1,0.1,
0.1,0.2,0.4,0.1,0.1,0.1,
0.1,0.2,0.3,0.1,0.1,0.2,
0.1,0.2,0.3,0.1,0.1,0.2,
0.1,0.2,0.3,0.1,0.1,0.2,
0.1,0.2,0.3,0.1,0.1,0.2,
0.1,0.2,0.3,0.1,0.1,0.2,
0.7,0.0,0.3,0.0,0.0,0.0,
0.1,0.2,0.3,0.1,0.1,0.2,
0.1,0.2,0.3,0.1,0.1,0.2,
0.1,0.7,0.0,0.0,0.0,0.3,
0.0,0.0,0.0,0.0,0.1,0.9,
0.1,0.2,0.3,0.1,0.1,0.2,
0.4,0.0,0.1,0.2,0.1,0.2,
0.8,0.0,0.0,0.0,0.0,0.2,
0.5,0.0,0.0,0.0,0.5,0.0,
0.0,0.7,0.3,0.0,0.0,0.0,
0.5,0.0,0.0,0.1,0.1,0.2,
} ; //这里的 0、1、2、3表示具体的a
double Fpsa (int s,int a ) //计算值迭代的贝努利部分
{
double Value = 0;
for ( int i=0;i<=5;i++)
{
{Value = Value + (Ps[a][s][i] * Vps[i]);}
}
return Value;
}
double Fps( int s) //p 策略下总价值(含未来)评估函数;这里区别于R(s),R(s)为只考虑当前状态的评估函数,写成贝努利方程形式,采用值迭代
{ //衰减因子暂时写成0.9
return (Rs(s) + 0.9* Fpsa (s,Pai[s].action)); // 注意pai会影响这里状态转移矩阵的代入,所以是pai下的价值评估
}
void Paiupdate(int s, int a )
{
Pai[s].action = a;
}
int _tmain(int argc, _TCHAR* argv[])
{
for (int i =0;i<= 5;i++)
{
Pai[i].state = i;
Pai[i].action = 0;
// 值迭代本不需要初始化pai,这里方便观察变化
Vps[i] = 0;
RealStaFa[i].s = i;
RealStaFa[i].Character = (double)i; //先这样写,做实验数据
}
while(1)
{
for (int i =0;i<= 5;i++)
{
Vbackup[i] = Vps[i];
}
for (int i = 0;i<= 5;i++)
{
double vsacompare = Fpsa(i,0);
int maxflag = 0 ;
for (int j=0;j<=3;j++)
{
if (Fpsa(i,j)> vsacompare)
{
maxflag = j;
}
}
Paiupdate(i,maxflag);//注意这里更新Pai很重要,会影响后面Vps的更新。
Vps[i] = Fps(i); //异步迭代的方式
}
//这里写停机条件,先随便写
if ( ( ( Vps[0]- Vbackup[0])*( Vps[0]- Vbackup[0])
+( Vps[1]- Vbackup[1])*( Vps[1]- Vbackup[1])
+( Vps[2]- Vbackup[2])*( Vps[2]- Vbackup[2])
+( Vps[3]- Vbackup[3])*( Vps[3]- Vbackup[3])
+( Vps[4]- Vbackup[4])*( Vps[4]- Vbackup[4])
+( Vps[5]- Vbackup[5])*( Vps[5]- Vbackup[5]))
/ ( (Vps[0]*Vps[0] + Vps[1]*Vps[1] + Vps[2]*Vps[2] + Vps[3]*Vps[3] + Vps[4]*Vps[4] + Vps[5]*Vps[5]))
<=0.1)
break;
}
printf ("结束/n");
return 0;
}