ActiveBOToytask/ToyTask/MountainCar.py

68 lines
1.9 KiB
Python
Raw Permalink Normal View History

2023-02-02 17:54:09 +00:00
import numpy as np
from math import cos
class MountainCarEnv:
def __init__(self, max_step):
self.min_position = -1.2
self.max_position = 0.6
self.max_speed = 0.07
self.goal_position = 0.5
self.position = 0.0
self.speed = 0.0
self.reward = 0.0
self.step_count = 0
self.max_step = max_step
self.distance_penaltiy = 100
self.reset()
def reset(self):
self.position = np.random.uniform(low=-0.6, high=-0.4)
self.speed = 0.0
self.reward = 0.0
self.step_count = 0
def state(self):
return self.position, self.speed
@staticmethod
def minmax(value, lower, upper):
return lower if value < lower else upper if value > upper else value
def termination(self):
if self.step_count < self.max_step:
if self.position >= self.goal_position:
return True
else:
self.reward += -1.0
return False
else:
self.reward += -(self.goal_position - self.position) * self.distance_penaltiy
return True
def step(self, action):
position, speed = self.state()
speed += (action-1)*0.001 + cos(3*position)*(-0.0025)
speed = self.minmax(speed, -self.max_speed, self.max_speed)
position += speed
position = self.minmax(position, self.min_position, self.max_position)
if (position == self.min_position) and (speed < 0.0):
speed = 0.0
self.speed = speed
self.position = position
self.step_count += 1
done = self.termination()
return done, self.reward, self.step_count
def runner(self, policy):
done = False
self.reset()
while not done:
action = policy[self.step_count]
done, _, _ = self.step(float(action))
return self.reward, self.step_count