initial commit
This commit is contained in:
commit
df79df9808
8
.idea/.gitignore
vendored
Normal file
8
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# Editor-based HTTP Client requests
|
||||||
|
/httpRequests/
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
1
.idea/.name
Normal file
1
.idea/.name
Normal file
@ -0,0 +1 @@
|
|||||||
|
ActiveBOToytask
|
10
.idea/ActiveBOToytask.iml
Normal file
10
.idea/ActiveBOToytask.iml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$">
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
||||||
|
</content>
|
||||||
|
<orderEntry type="jdk" jdkName="Python 3.8 (venv)" jdkType="Python SDK" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
53
.idea/inspectionProfiles/Project_Default.xml
Normal file
53
.idea/inspectionProfiles/Project_Default.xml
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<profile version="1.0">
|
||||||
|
<option name="myName" value="Project Default" />
|
||||||
|
<inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
||||||
|
<Languages>
|
||||||
|
<language minSize="342" name="Python" />
|
||||||
|
</Languages>
|
||||||
|
</inspection_tool>
|
||||||
|
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
||||||
|
<option name="ignoredErrors">
|
||||||
|
<list>
|
||||||
|
<option value="E302" />
|
||||||
|
<option value="E305" />
|
||||||
|
<option value="W292" />
|
||||||
|
<option value="E221" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</inspection_tool>
|
||||||
|
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
||||||
|
<option name="ignoredErrors">
|
||||||
|
<list>
|
||||||
|
<option value="N802" />
|
||||||
|
<option value="N806" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</inspection_tool>
|
||||||
|
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||||
|
<option name="ignoredIdentifiers">
|
||||||
|
<list>
|
||||||
|
<option value="node.*" />
|
||||||
|
<option value="rclpy" />
|
||||||
|
<option value="launch.*" />
|
||||||
|
<option value="parameter.*" />
|
||||||
|
<option value="rh8d_msgs.msg.LoneJoint" />
|
||||||
|
<option value="rh8d_msgs.msg.AllJoints" />
|
||||||
|
<option value="rh8d_msgs.msg.LoneMainBoard" />
|
||||||
|
<option value="rh8d_msgs.msg.AllMainBoards" />
|
||||||
|
<option value="rh8d_msgs.msg.ClearHWError" />
|
||||||
|
<option value="rh8d_msgs.msg.JointListSetStiffness" />
|
||||||
|
<option value="rh8d_msgs.msg.JointListSetSpeedPos" />
|
||||||
|
<option value="rh8d_msgs.msg.SetShutdownCond" />
|
||||||
|
<option value="rh8d_msgs.msg.AllSensors" />
|
||||||
|
<option value="rh8d_msgs.msg.LoneSensor" />
|
||||||
|
<option value="rh8d_msgs.msg.JointsSensorsCombined" />
|
||||||
|
<option value="rh8d_msgs.msg.SensorUserCommand" />
|
||||||
|
<option value="rh8d_msgs.msg.JointSetSpeedPos" />
|
||||||
|
<option value="src.web_test.web_test.dmp_node.web_msgs" />
|
||||||
|
<option value="src.web_test.web_test.acquisition_node.web_msgs" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</inspection_tool>
|
||||||
|
</profile>
|
||||||
|
</component>
|
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
4
.idea/misc.xml
Normal file
4
.idea/misc.xml
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (RlToyTask)" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/../ActiveBOToytask/.idea/ActiveBOToytask.iml" filepath="$PROJECT_DIR$/../ActiveBOToytask/.idea/ActiveBOToytask.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
15
AcquistionFunctions/ExpectedImprovement.py
Normal file
15
AcquistionFunctions/ExpectedImprovement.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
import numpy as np
|
||||||
|
from scipy.stats import norm
|
||||||
|
|
||||||
|
def ExpectedImprovement(gp, X, nr_test, nr_weights, kappa=2.576, seed=None, lower=-1.0, upper=1.0):
|
||||||
|
y_hat = gp.predict(X)
|
||||||
|
best_y = max(y_hat)
|
||||||
|
rng = np.random.default_rng(seed=seed)
|
||||||
|
X_test = rng.uniform(lower, upper, (nr_test, nr_weights))
|
||||||
|
mu, sigma = gp.predict(X_test, return_std=True)
|
||||||
|
z = (mu - best_y - kappa) / sigma
|
||||||
|
ei = (mu - best_y - kappa) * norm.cdf(z) + sigma * norm.pdf(z)
|
||||||
|
|
||||||
|
idx = np.argmax(ei)
|
||||||
|
X_next = X_test[idx, :]
|
||||||
|
return X_next
|
135
BayesianOptimization/BOwithGym.py
Normal file
135
BayesianOptimization/BOwithGym.py
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
import numpy as np
|
||||||
|
from sklearn.gaussian_process import GaussianProcessRegressor
|
||||||
|
from sklearn.gaussian_process.kernels import Matern
|
||||||
|
|
||||||
|
from PolicyModel.GaussianModel import GaussianPolicy
|
||||||
|
from AcquistionFunctions.ExpectedImprovement import ExpectedImprovement
|
||||||
|
|
||||||
|
import gym
|
||||||
|
import time
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
from warnings import catch_warnings, simplefilter
|
||||||
|
|
||||||
|
class BayesianOptimization:
|
||||||
|
def __init__(self, env, nr_step, nr_init=3, acq='ei', nr_weights=8, policy_seed=None):
|
||||||
|
self.env = env
|
||||||
|
self.nr_init = nr_init
|
||||||
|
self.acq = acq
|
||||||
|
self.X = None
|
||||||
|
self.Y = None
|
||||||
|
self.gp = None
|
||||||
|
|
||||||
|
self.episode = 0
|
||||||
|
self.best_reward = np.empty((1, 1))
|
||||||
|
self.distance_penalty = 10
|
||||||
|
|
||||||
|
self.nr_policy_weights = nr_weights
|
||||||
|
self.nr_steps = nr_step
|
||||||
|
self.policy_seed = policy_seed
|
||||||
|
self.lowerb = -1.0
|
||||||
|
self.upperb = 1.0
|
||||||
|
self.policy_model = GaussianPolicy(self.nr_policy_weights,
|
||||||
|
self.nr_steps,
|
||||||
|
self.policy_seed,
|
||||||
|
self.lowerb,
|
||||||
|
self.upperb)
|
||||||
|
|
||||||
|
self.nr_test = 100
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
self.env.reset()
|
||||||
|
self.env.render()
|
||||||
|
|
||||||
|
self.X = np.zeros((self.nr_init, self.nr_policy_weights))
|
||||||
|
self.Y = np.zeros((self.nr_init, 1))
|
||||||
|
|
||||||
|
for i in range(self.nr_init):
|
||||||
|
self.policy_model.random_policy()
|
||||||
|
self.X[i, :] = self.policy_model.weights.T
|
||||||
|
policy = self.policy_model.policy_rollout()
|
||||||
|
|
||||||
|
reward = self.runner(policy)
|
||||||
|
|
||||||
|
self.Y[i] = reward
|
||||||
|
|
||||||
|
self.gp = GaussianProcessRegressor(Matern(nu=1.5))
|
||||||
|
self.gp.fit(self.X, self.Y)
|
||||||
|
|
||||||
|
def runner(self, policy):
|
||||||
|
done = False
|
||||||
|
step_count = 0
|
||||||
|
env_reward = 0.0
|
||||||
|
while not done:
|
||||||
|
action = policy[step_count]
|
||||||
|
output = self.env.step(action)
|
||||||
|
env_reward += output[1]
|
||||||
|
done = output[2]
|
||||||
|
time.sleep(0.0001)
|
||||||
|
step_count += 1
|
||||||
|
if step_count >= self.nr_steps:
|
||||||
|
done = True
|
||||||
|
distance = -(self.env.goal_position - output[0][0])
|
||||||
|
env_reward += distance * self.distance_penalty
|
||||||
|
|
||||||
|
time.sleep(0.25)
|
||||||
|
self.env.reset()
|
||||||
|
return env_reward
|
||||||
|
|
||||||
|
def next_observation(self):
|
||||||
|
if self.acq == 'ei':
|
||||||
|
x_next = ExpectedImprovement(self.gp,
|
||||||
|
self.X,
|
||||||
|
self.nr_test,
|
||||||
|
self.nr_policy_weights,
|
||||||
|
seed=self.policy_seed,
|
||||||
|
lower=self.lowerb,
|
||||||
|
upper=self.upperb)
|
||||||
|
|
||||||
|
return x_next
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def eval_new_observation(self, x_next):
|
||||||
|
self.policy_model.weights = x_next
|
||||||
|
policy = self.policy_model.policy_rollout()
|
||||||
|
reward = self.runner(policy)
|
||||||
|
|
||||||
|
self.X = np.vstack((self.X, x_next))
|
||||||
|
self.Y = np.vstack((self.Y, reward))
|
||||||
|
|
||||||
|
self.gp.fit(self.X, self.Y)
|
||||||
|
|
||||||
|
if self.episode == 0:
|
||||||
|
self.best_reward[0] = max(self.Y)
|
||||||
|
else:
|
||||||
|
self.best_reward = np.vstack((self.best_reward, max(self.Y)))
|
||||||
|
|
||||||
|
self.episode += 1
|
||||||
|
self.policy_model.plot_policy()
|
||||||
|
|
||||||
|
def plot_reward(self):
|
||||||
|
epsiodes = np.linspace(0, self.episode, self.episode)
|
||||||
|
plt.plot(epsiodes, self.best_reward)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
nr_steps = 80
|
||||||
|
env = gym.envs.make('MountainCarContinuous-v0', render_mode="human")
|
||||||
|
bo = BayesianOptimization(env, nr_steps)
|
||||||
|
bo.initialize()
|
||||||
|
iteration_steps = 100
|
||||||
|
for i in range(iteration_steps):
|
||||||
|
x_next = bo.next_observation()
|
||||||
|
bo.eval_new_observation(x_next)
|
||||||
|
|
||||||
|
print(bo.episode, bo.best_reward[-1][0])
|
||||||
|
|
||||||
|
bo.plot_reward()
|
||||||
|
bo.policy_model.plot_policy()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
126
BayesianOptimization/BayesianOptimization.py
Normal file
126
BayesianOptimization/BayesianOptimization.py
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
import numpy as np
|
||||||
|
from sklearn.gaussian_process import GaussianProcessRegressor
|
||||||
|
from sklearn.gaussian_process.kernels import Matern
|
||||||
|
|
||||||
|
from PolicyModel.GaussianModel import GaussianPolicy
|
||||||
|
from ToyTask.MountainCar import MountainCarEnv
|
||||||
|
from AcquistionFunctions.ExpectedImprovement import ExpectedImprovement
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
from warnings import catch_warnings, simplefilter
|
||||||
|
|
||||||
|
class BayesianOptimization:
|
||||||
|
def __init__(self, env, nr_init=3, acq='ei', nr_weights=8, policy_seed=None):
|
||||||
|
self.env = env
|
||||||
|
self.nr_init = nr_init
|
||||||
|
self.acq = acq
|
||||||
|
self.X = None
|
||||||
|
self.Y = None
|
||||||
|
self.gp = None
|
||||||
|
|
||||||
|
self.episode = 0
|
||||||
|
self.best_reward = np.empty((1, 1))
|
||||||
|
|
||||||
|
self.nr_policy_weights = nr_weights
|
||||||
|
self.nr_steps = env.max_step
|
||||||
|
self.policy_seed = policy_seed
|
||||||
|
self.lowerb = -1.0
|
||||||
|
self.upperb = 1.0
|
||||||
|
self.policy_model = GaussianPolicy(self.nr_policy_weights,
|
||||||
|
self.nr_steps,
|
||||||
|
self.policy_seed,
|
||||||
|
self.lowerb,
|
||||||
|
self.upperb)
|
||||||
|
|
||||||
|
self.nr_test = 100
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
self.X = np.zeros((self.nr_init, self.nr_policy_weights))
|
||||||
|
self.Y = np.zeros((self.nr_init, 1))
|
||||||
|
|
||||||
|
for i in range(self.nr_init):
|
||||||
|
self.policy_model.random_policy()
|
||||||
|
self.X[i, :] = self.policy_model.weights.T
|
||||||
|
policy = self.policy_model.policy_rollout()
|
||||||
|
|
||||||
|
reward, step_count = self.env.runner(policy)
|
||||||
|
|
||||||
|
self.Y[i] = reward
|
||||||
|
|
||||||
|
self.gp = GaussianProcessRegressor(Matern(nu=1.5))
|
||||||
|
self.gp.fit(self.X, self.Y)
|
||||||
|
|
||||||
|
def next_observation(self):
|
||||||
|
if self.acq == 'ei':
|
||||||
|
x_next = ExpectedImprovement(self.gp,
|
||||||
|
self.X,
|
||||||
|
self.nr_test,
|
||||||
|
self.nr_policy_weights,
|
||||||
|
seed=self.policy_seed,
|
||||||
|
lower=self.lowerb,
|
||||||
|
upper=self.upperb)
|
||||||
|
|
||||||
|
return x_next
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def eval_new_observation(self, x_next):
|
||||||
|
self.policy_model.weights = x_next
|
||||||
|
policy = self.policy_model.policy_rollout()
|
||||||
|
reward, _ = self.env.runner(policy)
|
||||||
|
|
||||||
|
self.X = np.vstack((self.X, x_next))
|
||||||
|
self.Y = np.vstack((self.Y, reward))
|
||||||
|
|
||||||
|
self.gp.fit(self.X, self.Y)
|
||||||
|
|
||||||
|
if self.episode == 0:
|
||||||
|
self.best_reward[0] = max(self.Y)
|
||||||
|
else:
|
||||||
|
self.best_reward = np.vstack((self.best_reward, max(self.Y)))
|
||||||
|
|
||||||
|
self.episode += 1
|
||||||
|
|
||||||
|
def plot(self):
|
||||||
|
if self.nr_policy_weights == 1:
|
||||||
|
plt.scatter(self.X, self.Y)
|
||||||
|
|
||||||
|
x_plot = np.linspace(self.lowerb, self.upperb, 100).reshape(-1, 1)
|
||||||
|
with catch_warnings():
|
||||||
|
simplefilter('ignore')
|
||||||
|
y_plot, y_sigma = self.gp.predict(x_plot, return_std=True)
|
||||||
|
|
||||||
|
plt.plot(x_plot, y_plot)
|
||||||
|
plt.fill_between(
|
||||||
|
x_plot.ravel(),
|
||||||
|
y_plot - 1.96 * y_sigma,
|
||||||
|
y_plot + 1.96 * y_sigma,
|
||||||
|
alpha=0.5
|
||||||
|
)
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
def plot_reward(self):
|
||||||
|
epsiodes = np.linspace(0, self.episode, self.episode)
|
||||||
|
plt.plot(epsiodes, self.best_reward)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
max_step = 50
|
||||||
|
car = MountainCarEnv(max_step)
|
||||||
|
bo = BayesianOptimization(car)
|
||||||
|
bo.initialize()
|
||||||
|
iteration_steps = 100
|
||||||
|
for i in range(iteration_steps):
|
||||||
|
x_next = bo.next_observation()
|
||||||
|
bo.eval_new_observation(x_next)
|
||||||
|
|
||||||
|
bo.plot_reward()
|
||||||
|
bo.policy_model.plot_policy()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
63
PolicyModel/GaussianModel.py
Normal file
63
PolicyModel/GaussianModel.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
class GaussianPolicy:
|
||||||
|
def __init__(self, nr_weights, nr_steps, seed=None, lowerb=-1.0, upperb=1.0):
|
||||||
|
self.nr_weights = nr_weights
|
||||||
|
self.nr_steps = nr_steps
|
||||||
|
self.weights = None
|
||||||
|
self.trajectory = None
|
||||||
|
self.mean = np.linspace(0, self.nr_steps, self.nr_weights)
|
||||||
|
if nr_weights > 1:
|
||||||
|
self.std = self.mean[1] / (2 * np.sqrt(2 * np.log(2))) # Full width at half maximum
|
||||||
|
else:
|
||||||
|
self.std = self.nr_steps / 2
|
||||||
|
|
||||||
|
self.rng = np.random.default_rng(seed=seed)
|
||||||
|
self.low = lowerb
|
||||||
|
self.upper = upperb
|
||||||
|
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.weights = np.zeros((self.nr_weights, 1))
|
||||||
|
self.trajectory = np.zeros((self.nr_steps, 1))
|
||||||
|
|
||||||
|
def random_policy(self):
|
||||||
|
self.weights = self.rng.uniform(self.low, self.upper, self.nr_weights)
|
||||||
|
|
||||||
|
def policy_rollout(self):
|
||||||
|
self.trajectory = np.zeros((self.nr_steps, 1))
|
||||||
|
for i in range(self.nr_steps):
|
||||||
|
for j in range(self.nr_weights):
|
||||||
|
base_fun = np.exp(-0.5*(i - self.mean[j])**2 / self.std**2)
|
||||||
|
self.trajectory[i] += base_fun * self.weights[j]
|
||||||
|
|
||||||
|
return self.trajectory
|
||||||
|
|
||||||
|
def plot_policy(self):
|
||||||
|
x = np.linspace(0, self.nr_steps, self.nr_steps)
|
||||||
|
plt.plot(x, self.trajectory)
|
||||||
|
for i in self.mean:
|
||||||
|
gaussian = np.exp(-0.5 * (x - i)**2 / self.std**2)
|
||||||
|
plt.plot(x, gaussian)
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
policy = GaussianPolicy(1, 50)
|
||||||
|
policy.random_policy()
|
||||||
|
policy.policy_rollout()
|
||||||
|
print(policy.weights)
|
||||||
|
|
||||||
|
fig, (ax1, ax2) = plt.subplots(2, 1)
|
||||||
|
|
||||||
|
x = np.linspace(0, policy.nr_steps, policy.nr_steps)
|
||||||
|
ax1.plot(x, policy.trajectory)
|
||||||
|
ax2.bar(policy.mean, policy.weights)
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
67
ToyTask/MountainCar.py
Normal file
67
ToyTask/MountainCar.py
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
import numpy as np
|
||||||
|
from math import cos
|
||||||
|
|
||||||
|
class MountainCarEnv:
|
||||||
|
def __init__(self, max_step):
|
||||||
|
self.min_position = -1.2
|
||||||
|
self.max_position = 0.6
|
||||||
|
self.max_speed = 0.07
|
||||||
|
self.goal_position = 0.5
|
||||||
|
self.position = 0.0
|
||||||
|
self.speed = 0.0
|
||||||
|
self.reward = 0.0
|
||||||
|
self.step_count = 0
|
||||||
|
self.max_step = max_step
|
||||||
|
self.distance_penaltiy = 100
|
||||||
|
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.position = np.random.uniform(low=-0.6, high=-0.4)
|
||||||
|
self.speed = 0.0
|
||||||
|
self.reward = 0.0
|
||||||
|
self.step_count = 0
|
||||||
|
|
||||||
|
def state(self):
|
||||||
|
return self.position, self.speed
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def minmax(value, lower, upper):
|
||||||
|
return lower if value < lower else upper if value > upper else value
|
||||||
|
|
||||||
|
def termination(self):
|
||||||
|
if self.step_count < self.max_step:
|
||||||
|
if self.position >= self.goal_position:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.reward += -1.0
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
self.reward += -(self.goal_position - self.position) * self.distance_penaltiy
|
||||||
|
return True
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
position, speed = self.state()
|
||||||
|
speed += (action-1)*0.001 + cos(3*position)*(-0.0025)
|
||||||
|
speed = self.minmax(speed, -self.max_speed, self.max_speed)
|
||||||
|
position += speed
|
||||||
|
position = self.minmax(position, self.min_position, self.max_position)
|
||||||
|
if (position == self.min_position) and (speed < 0.0):
|
||||||
|
speed = 0.0
|
||||||
|
|
||||||
|
self.speed = speed
|
||||||
|
self.position = position
|
||||||
|
|
||||||
|
self.step_count += 1
|
||||||
|
done = self.termination()
|
||||||
|
|
||||||
|
return done, self.reward, self.step_count
|
||||||
|
|
||||||
|
def runner(self, policy):
|
||||||
|
done = False
|
||||||
|
self.reset()
|
||||||
|
while not done:
|
||||||
|
action = policy[self.step_count]
|
||||||
|
done, _, _ = self.step(float(action))
|
||||||
|
|
||||||
|
return self.reward, self.step_count
|
Loading…
Reference in New Issue
Block a user