import numpy as np from math import cos class MountainCarEnv: def __init__(self, max_step): self.min_position = -1.2 self.max_position = 0.6 self.max_speed = 0.07 self.goal_position = 0.5 self.position = 0.0 self.speed = 0.0 self.reward = 0.0 self.step_count = 0 self.max_step = max_step self.distance_penaltiy = 100 self.reset() def reset(self): self.position = np.random.uniform(low=-0.6, high=-0.4) self.speed = 0.0 self.reward = 0.0 self.step_count = 0 def state(self): return self.position, self.speed @staticmethod def minmax(value, lower, upper): return lower if value < lower else upper if value > upper else value def termination(self): if self.step_count < self.max_step: if self.position >= self.goal_position: return True else: self.reward += -1.0 return False else: self.reward += -(self.goal_position - self.position) * self.distance_penaltiy return True def step(self, action): position, speed = self.state() speed += (action-1)*0.001 + cos(3*position)*(-0.0025) speed = self.minmax(speed, -self.max_speed, self.max_speed) position += speed position = self.minmax(position, self.min_position, self.max_position) if (position == self.min_position) and (speed < 0.0): speed = 0.0 self.speed = speed self.position = position self.step_count += 1 done = self.termination() return done, self.reward, self.step_count def runner(self, policy): done = False self.reset() while not done: action = policy[self.step_count] done, _, _ = self.step(float(action)) return self.reward, self.step_count