BO execute error
This commit is contained in:
parent
916b11e933
commit
a05e5af477
@ -22,6 +22,7 @@ find_package(rosidl_default_generators REQUIRED)
|
|||||||
rosidl_generate_interfaces(${PROJECT_NAME}
|
rosidl_generate_interfaces(${PROJECT_NAME}
|
||||||
"srv/WeightToPolicy.srv"
|
"srv/WeightToPolicy.srv"
|
||||||
"srv/RLRollOut.srv"
|
"srv/RLRollOut.srv"
|
||||||
|
"srv/BO.srv"
|
||||||
"msg/ImageFeedback.msg"
|
"msg/ImageFeedback.msg"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
9
src/active_bo_msgs/srv/BO.srv
Normal file
9
src/active_bo_msgs/srv/BO.srv
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
uint16 nr_weights
|
||||||
|
uint16 max_steps
|
||||||
|
uint16 nr_episodes
|
||||||
|
uint16 nr_runs
|
||||||
|
string acquisition_function
|
||||||
|
---
|
||||||
|
float32[] best_policy
|
||||||
|
float32[] reward_mean
|
||||||
|
float32[] reward_std
|
@ -7,9 +7,6 @@ from active_bo_ros.AcquistionFunctions.ExpectedImprovement import ExpectedImprov
|
|||||||
from active_bo_ros.AcquistionFunctions.ProbabilityOfImprovement import ProbabilityOfImprovement
|
from active_bo_ros.AcquistionFunctions.ProbabilityOfImprovement import ProbabilityOfImprovement
|
||||||
from active_bo_ros.AcquistionFunctions.ConfidenceBound import ConfidenceBound
|
from active_bo_ros.AcquistionFunctions.ConfidenceBound import ConfidenceBound
|
||||||
|
|
||||||
from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv
|
|
||||||
|
|
||||||
|
|
||||||
class BayesianOptimization:
|
class BayesianOptimization:
|
||||||
def __init__(self, env, nr_steps, nr_init=3, acq='ei', nr_weights=6, policy_seed=None):
|
def __init__(self, env, nr_steps, nr_init=3, acq='ei', nr_weights=6, policy_seed=None):
|
||||||
self.env = env
|
self.env = env
|
||||||
@ -145,5 +142,14 @@ class BayesianOptimization:
|
|||||||
self.episode += 1
|
self.episode += 1
|
||||||
return step_count
|
return step_count
|
||||||
|
|
||||||
|
def get_best_result(self):
|
||||||
|
y_hat = self.GP.predict(self.X)
|
||||||
|
idx = np.argmax(y_hat)
|
||||||
|
x_max = self.X[idx, :]
|
||||||
|
|
||||||
|
self.policy_model.weights = x_max
|
||||||
|
|
||||||
|
return self.policy_model.rollout(), y_hat[idx]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -0,0 +1,64 @@
|
|||||||
|
from active_bo_msgs.srv import BO
|
||||||
|
|
||||||
|
import rclpy
|
||||||
|
from rclpy.node import Node
|
||||||
|
|
||||||
|
from active_bo_ros.BayesianOptimization.BayesianOptimization import BayesianOptimization
|
||||||
|
from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class BOService(Node):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__('bo_service')
|
||||||
|
self.srv = self.create_service(BO, 'bo_srv', self.bo_callback)
|
||||||
|
|
||||||
|
self.env = Continuous_MountainCarEnv()
|
||||||
|
self.distance_penalty = 0
|
||||||
|
|
||||||
|
self.nr_init = 3
|
||||||
|
|
||||||
|
def bo_callback(self, request, response):
|
||||||
|
nr_weights = request.nr_weights
|
||||||
|
max_steps = request.steps
|
||||||
|
nr_episodes = request.nr_episodes
|
||||||
|
nr_runs = request.nr_runs
|
||||||
|
acq = request.acquisition_function
|
||||||
|
|
||||||
|
reward = np.zeros((nr_episodes, nr_runs))
|
||||||
|
best_pol_reward = np.zeros((nr_runs, 1))
|
||||||
|
best_policy = np.zeros((max_steps, nr_runs))
|
||||||
|
|
||||||
|
BO = BayesianOptimization(self.env,
|
||||||
|
max_steps,
|
||||||
|
nr_init=self.nr_init,
|
||||||
|
acq=acq,
|
||||||
|
nr_weights=nr_weights)
|
||||||
|
|
||||||
|
for i in range(nr_runs):
|
||||||
|
BO.initialize()
|
||||||
|
|
||||||
|
for j in range(nr_episodes):
|
||||||
|
x_next = BO.next_observation()
|
||||||
|
BO.eval_new_observation(x_next)
|
||||||
|
|
||||||
|
best_policy[:, i], best_pol_reward[:, i] = BO.get_best_result()
|
||||||
|
reward[:, i] = BO.best_reward.T
|
||||||
|
|
||||||
|
response.reward_mean = np.mean(reward, axis=1)
|
||||||
|
response.reward_std = np.std(reward, axis=1)
|
||||||
|
|
||||||
|
best_policy_idx = np.argmax(best_pol_reward)
|
||||||
|
response.best_policy = best_policy[:, best_policy_idx]
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=None):
|
||||||
|
rclpy.init(args=args)
|
||||||
|
|
||||||
|
bo_service = BOService()
|
||||||
|
|
||||||
|
rclpy.spin(bo_service)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
11
src/active_bo_ros/launch/bo_service.launch.py
Normal file → Executable file
11
src/active_bo_ros/launch/bo_service.launch.py
Normal file → Executable file
@ -0,0 +1,11 @@
|
|||||||
|
from launch import LaunchDescription
|
||||||
|
from launch_ros.actions import Node
|
||||||
|
|
||||||
|
def generate_launch_description():
|
||||||
|
return LaunchDescription([
|
||||||
|
Node(
|
||||||
|
package='active_bo_ros',
|
||||||
|
executable='bo_srv',
|
||||||
|
name='bo_srv'
|
||||||
|
),
|
||||||
|
])
|
@ -8,15 +8,17 @@ setup(
|
|||||||
name=package_name,
|
name=package_name,
|
||||||
version='0.0.0',
|
version='0.0.0',
|
||||||
packages=[package_name,
|
packages=[package_name,
|
||||||
package_name+'/PolicyModel',
|
package_name + '/PolicyModel',
|
||||||
package_name+'/ReinforcementLearning'],
|
package_name + '/ReinforcementLearning',
|
||||||
|
package_name + '/AcquisitionFunctions',
|
||||||
|
package_name + '/BayesianOptimization'],
|
||||||
data_files=[
|
data_files=[
|
||||||
('share/ament_index/resource_index/packages',
|
('share/ament_index/resource_index/packages',
|
||||||
['resource/' + package_name]),
|
['resource/' + package_name]),
|
||||||
('share/' + package_name, ['package.xml']),
|
('share/' + package_name, ['package.xml']),
|
||||||
(os.path.join('share', package_name), glob('launch/*.launch.py')),
|
(os.path.join('share', package_name), glob('launch/*.launch.py')),
|
||||||
],
|
],
|
||||||
install_requires=['setuptools', 'gym', 'numpy'],
|
install_requires=['setuptools', 'gym', 'numpy', 'sklearn'],
|
||||||
zip_safe=True,
|
zip_safe=True,
|
||||||
maintainer='cpsfeith',
|
maintainer='cpsfeith',
|
||||||
maintainer_email='nikolaus.feith@unileoben.ac.at',
|
maintainer_email='nikolaus.feith@unileoben.ac.at',
|
||||||
@ -26,7 +28,8 @@ setup(
|
|||||||
entry_points={
|
entry_points={
|
||||||
'console_scripts': [
|
'console_scripts': [
|
||||||
'policy_srv = active_bo_ros.policy_service:main',
|
'policy_srv = active_bo_ros.policy_service:main',
|
||||||
'rl_srv = active_bo_ros.rl_service:main'
|
'rl_srv = active_bo_ros.rl_service:main',
|
||||||
|
'bo_srv = active_bo_ros.bo_service:main'
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user