# 2. 环境

## 2.1 Observation & state

textObservation Min Max
0 Position -1.2 0.6
1 Velocity -0.07 0.07

## 2.2 Actions

n Action
0 将车推向左侧（负值）或向右侧（正值）

# 3. 代码

## 3.1 导入lib

import math
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np


## 3.2 定义Continuous_MountainCarEnv类

class Continuous_MountainCarEnv(gym.Env):
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 30
}


### 3.2.1 定义__init__(self)函数

def __init__(self):
self.min_action = -1.0  # 最小动作值
self.max_action = 1.0   # 最大动作值
self.min_position = -1.2 # 最低位置
self.max_position = 0.6  # 最高位置
self.max_speed = 0.07  # 最大速度
self.goal_position = 0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
self.power = 0.0015

self.low_state = np.array([self.min_position, -self.max_speed]) # [-1.2, -0.07]
self.high_state = np.array([self.max_position, self.max_speed]) # [0.6, 0.07]

self.viewer = None
#   声明observation space和action space的上下限
self.action_space = spaces.Box(low=self.min_action, high=self.max_action, shape=(1,))
# (low = 1.0, high = 1.0)
self.observation_space = spaces.Box(low=self.low_state, high=self.high_state)
# (low = -1.2, high = 0.6 )

self.seed()
self.reset()


### 3.2.2 定义随机种子函数seed(self, seed=None)

    def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]


### 3.2.3 定义step(self, action)函数

step()函数

    def step(self, action):

1.    position = self.state[0]
2.    velocity = self.state[1]
# position, velocity = self.state
3.    force = min(max(action[0], -1.0), 1.0)

4.    velocity += force*self.power - 0.0025 * math.cos(3*position)
5.    if (velocity > self.max_speed): velocity = self.max_speed
6.    if (velocity < -self.max_speed): velocity = -self.max_speed
7.    position += velocity
8.    if (position > self.max_position): position = self.max_position
9.    if (position < self.min_position): position = self.min_position
10.   if (position==self.min_position and velocity<0): velocity = 0

11.   done = bool(position >= self.goal_position)

12.   reward = 0
13.   if done:
14.       reward = 100.0
15.   reward-= math.pow(action[0],2)*0.1

16.   self.state = np.array([position, velocity])
17.   return self.state, reward, done, {}

1. 初始化位置状态

2. 初始化速度状态

3. 引擎力：内层的max(action[0], -1.0)确保动作值不低于下界，即 - 1.0，
外层的min(max(action[0], -1.0), 1.0)确保动作值不高于上界，即 1.0

4. 计算速度：注意是速度累加的，这是微分的概念，把连续过程离散成很小的片段以进行近似

5. 判断当前速度是否大于最大速度：如果是，将当前速度设定为最大速度

6. 判断当前速度是否小于最小速度：如果是，将当前速度设定为最小速度

7. 计算位置

8. 判断当前位置是否高于最高位置：如果是，将当前位置设定为最高位置

9. 判断当前位置是否低于最低位置：如果是，将当前位置设定为最低位置

10. 如果当前位置是最低位置速度小于 0 ：将速度设为0

11. 判断布尔类型的，返回True或者False

12. 初始化 reward = 0

13. 如果当前位置高于目标位置，

14. 给予 agent 值为100的reward

15. 这是执行动作之后得到的新的状态

16. step()

函数返回下一时刻的观测，回报，是否终止,调试项

MountainCarContinuous-v0

11-15 这几行代码的意思是：每执行一个step，就会检查看自己是否越过了右边的山峰，据此来给done赋值，如果小车没有越过右边的山峰，即 done=False，则在这一个step, reward将会记为，也就是这一个时间步我们耗费了多少能量，我们当然不希望耗油太多。如果小车越过右边的山峰，即 done=True，这一个step就会马上得到 的奖励。

### 3.2.4 定义reset()函数：

    def reset(self):
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
return np.array(self.state)


### 3.2.5 定义_height(self, xs)函数：

    def _height(self, xs):
return np.sin(3 * xs)*.45+.55


### 3.2.6 定义render(self, mode='human')函数

render()函数是图像引擎,就是人机交互界面，进行动画演示，一个仿真环境必不可少的两部分 是物理引擎和图像引擎。物理引擎模拟环境中物体的运动规律；图像引擎用来显示环境中的物体图像。

    def render(self, mode='human'):
screen_width = 600
screen_height = 400

world_width = self.max_position - self.min_position
scale = screen_width/world_width
carwidth=40
carheight=20

if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(screen_width, screen_height)
xs = np.linspace(self.min_position, self.max_position, 100)
ys = self._height(xs)
xys = list(zip((xs-self.min_position)*scale, ys*scale))

self.track = rendering.make_polyline(xys)
self.track.set_linewidth(4)

clearance = 10

l,r,t,b = -carwidth/2, carwidth/2, carheight, 0
car = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
self.cartrans = rendering.Transform()
frontwheel = rendering.make_circle(carheight/2.5)
frontwheel.set_color(.5, .5, .5)
backwheel = rendering.make_circle(carheight/2.5)
backwheel.set_color(.5, .5, .5)
flagx = (self.goal_position-self.min_position)*scale
flagy1 = self._height(self.goal_position)*scale
flagy2 = flagy1 + 50
flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
flag = rendering.FilledPolygon([(flagx, flagy2), (flagx, flagy2-10),
(flagx+25, flagy2-5)])
flag.set_color(.8,.8,0)

pos = self.state[0]
self.cartrans.set_translation((pos-self.min_position)*scale, self._height(pos)*scale)
self.cartrans.set_rotation(math.cos(3 * pos))

return self.viewer.render(return_rgb_array = mode=='rgb_array')


### 3.2.7 定义close(self)函数

    def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None


# 4. 运行

## 4.1 完整代码：continuous_mountain_car.py

"""
MountainCarContinuous-v1
@author: Olivier Sigaud
A merge between two sources:
* Adaptation of the MountainCar Environment from the "FAReinforcement" library
of Jose Antonio Martin H. (version 1.0), adapted by  'Tom Schaul, tom@idsia.ch'
and then modified by Arnaud de Broissia
* the OpenAI/gym MountainCar environment
itself from
http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
"""

import math

import numpy as np

import gym
from gym import spaces
from gym.utils import seeding

class ContinuousMountainCarEnv(gym.Env):
"""
Description:
The agent (a car) is started at the bottom of a valley. For any given
state the agent may choose to accelerate to the left, right or cease
any acceleration.
Observation:
Type: Box(2)
Num    Observation               Min            Max
0      Car Position              -1.2           0.6
1      Car Velocity              -0.07          0.07
Actions:
Type: Box(1)
Num    Action                    Min            Max
0      the power coef            -1.0           1.0
Note: actual driving force is calculated by multiplying the power coef by power (0.0015)
Reward:
Reward of 100 is awarded if the agent reached the flag (position = 0.45) on top of the mountain.
Reward is decrease based on amount of energy consumed each step.
Starting State:
The position of the car is assigned a uniform random value in
[-0.6 , -0.4].
The starting velocity of the car is always assigned to 0.
Episode Termination:
The car position is more than 0.45
Episode length is greater than 200
"""

metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30}

def __init__(self, goal_velocity=0):
self.min_action = -1.0
self.max_action = 1.0
self.min_position = -1.2
self.max_position = 0.6
self.max_speed = 0.07
self.goal_position = (
0.45  # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
)
self.goal_velocity = goal_velocity
self.power = 0.0015

self.low_state = np.array(
[self.min_position, -self.max_speed], dtype=np.float32
)
self.high_state = np.array(
[self.max_position, self.max_speed], dtype=np.float32
)

self.viewer = None

self.action_space = spaces.Box(
low=self.min_action, high=self.max_action, shape=(1,), dtype=np.float32
)
self.observation_space = spaces.Box(
low=self.low_state, high=self.high_state, dtype=np.float32
)

self.seed()

def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]

def step(self, action):

position = self.state[0]
velocity = self.state[1]
force = min(max(action[0], self.min_action), self.max_action)

velocity += force * self.power - 0.0025 * math.cos(3 * position)
if velocity > self.max_speed:
velocity = self.max_speed
if velocity < -self.max_speed:
velocity = -self.max_speed
position += velocity
if position > self.max_position:
position = self.max_position
if position < self.min_position:
position = self.min_position
if position == self.min_position and velocity < 0:
velocity = 0

# Convert a possible numpy bool to a Python bool.
done = bool(position >= self.goal_position and velocity >= self.goal_velocity)

reward = 0
if done:
reward = 100.0
reward -= math.pow(action[0], 2) * 0.1

self.state = np.array([position, velocity], dtype=np.float32)
return self.state, reward, done, {}

def reset(self):
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
return np.array(self.state, dtype=np.float32)

def _height(self, xs):
return np.sin(3 * xs) * 0.45 + 0.55

def render(self, mode="human"):
screen_width = 600
screen_height = 400

world_width = self.max_position - self.min_position
scale = screen_width / world_width
carwidth = 40
carheight = 20

if self.viewer is None:
from gym.envs.classic_control import rendering

self.viewer = rendering.Viewer(screen_width, screen_height)
xs = np.linspace(self.min_position, self.max_position, 100)
ys = self._height(xs)
xys = list(zip((xs - self.min_position) * scale, ys * scale))

self.track = rendering.make_polyline(xys)
self.track.set_linewidth(4)

clearance = 10

l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0
car = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
self.cartrans = rendering.Transform()
frontwheel = rendering.make_circle(carheight / 2.5)
frontwheel.set_color(0.5, 0.5, 0.5)
rendering.Transform(translation=(carwidth / 4, clearance))
)
backwheel = rendering.make_circle(carheight / 2.5)
rendering.Transform(translation=(-carwidth / 4, clearance))
)
backwheel.set_color(0.5, 0.5, 0.5)
flagx = (self.goal_position - self.min_position) * scale
flagy1 = self._height(self.goal_position) * scale
flagy2 = flagy1 + 50
flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
flag = rendering.FilledPolygon(
[(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)]
)
flag.set_color(0.8, 0.8, 0)

pos = self.state[0]
self.cartrans.set_translation(
(pos - self.min_position) * scale, self._height(pos) * scale
)
self.cartrans.set_rotation(math.cos(3 * pos))

return self.viewer.render(return_rgb_array=mode == "rgb_array")

def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None



## 4.2 注册环境

from gym.envs.classic_control.continuous_mountain_car import ContinuousMountainCarEnv

register(

id='MountainCarContinuous-v1',

entry_point='gym.envs.classic_control:GridEnv'

)

"""

"""


## 4.3 创建运行代码：MountainCarContinuous.py

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Toolby: PyCharm

import gym

env = gym.make('MountainCarContinuous-v1')
env = env.unwrapped

total_steps = 0

for i_episode in range(10):

observation = env.reset()
ep_r = 0
while True:
env.render()

action = env.action_space.sample()

observation_, reward, done, info = env.step(action)

position, velocity = observation_

# 车开得越高 reward 越大
reward = abs(position - (-0.5))

ep_r += reward
if done:
get = '| Get' if observation_[0] >= env.unwrapped.goal_position else '| ----'
print('Epi: ', i_episode,
get,
'| Ep_r: ', round(ep_r, 4))

break

observation = observation
total_steps += 1



# 五：参考

posted @ 2021-11-30 18:19  十七岁的有德  阅读(79)  评论(0编辑  收藏  举报