68 lines
1.9 KiB
Python
68 lines
1.9 KiB
Python
import numpy as np
|
|
from math import cos
|
|
|
|
class MountainCarEnv:
|
|
def __init__(self, max_step):
|
|
self.min_position = -1.2
|
|
self.max_position = 0.6
|
|
self.max_speed = 0.07
|
|
self.goal_position = 0.5
|
|
self.position = 0.0
|
|
self.speed = 0.0
|
|
self.reward = 0.0
|
|
self.step_count = 0
|
|
self.max_step = max_step
|
|
self.distance_penaltiy = 100
|
|
|
|
self.reset()
|
|
|
|
def reset(self):
|
|
self.position = np.random.uniform(low=-0.6, high=-0.4)
|
|
self.speed = 0.0
|
|
self.reward = 0.0
|
|
self.step_count = 0
|
|
|
|
def state(self):
|
|
return self.position, self.speed
|
|
|
|
@staticmethod
|
|
def minmax(value, lower, upper):
|
|
return lower if value < lower else upper if value > upper else value
|
|
|
|
def termination(self):
|
|
if self.step_count < self.max_step:
|
|
if self.position >= self.goal_position:
|
|
return True
|
|
else:
|
|
self.reward += -1.0
|
|
return False
|
|
else:
|
|
self.reward += -(self.goal_position - self.position) * self.distance_penaltiy
|
|
return True
|
|
|
|
def step(self, action):
|
|
position, speed = self.state()
|
|
speed += (action-1)*0.001 + cos(3*position)*(-0.0025)
|
|
speed = self.minmax(speed, -self.max_speed, self.max_speed)
|
|
position += speed
|
|
position = self.minmax(position, self.min_position, self.max_position)
|
|
if (position == self.min_position) and (speed < 0.0):
|
|
speed = 0.0
|
|
|
|
self.speed = speed
|
|
self.position = position
|
|
|
|
self.step_count += 1
|
|
done = self.termination()
|
|
|
|
return done, self.reward, self.step_count
|
|
|
|
def runner(self, policy):
|
|
done = False
|
|
self.reset()
|
|
while not done:
|
|
action = policy[self.step_count]
|
|
done, _, _ = self.step(float(action))
|
|
|
|
return self.reward, self.step_count
|