Compare commits
24 Commits
393379520b
...
master
Author | SHA1 | Date | |
---|---|---|---|
ffca9d2bc0 | |||
d8bada4917 | |||
9ad94fd1ce | |||
bf49376d3c | |||
f875ee34c1 | |||
2422693b42 | |||
bffe826a74 | |||
e0874c3d9d | |||
aa04e0b833 | |||
7c3ad608f7 | |||
a23e5be64c | |||
acf51f01c6 | |||
25c54417eb | |||
ddb98b62b3 | |||
3b7ec52502 | |||
7056a6da07 | |||
7fe46bfa25 | |||
5e42128f06 | |||
b83c490464 | |||
7bcba1132a | |||
d8dd0c4f4b | |||
5c6750886c | |||
220d590c05 | |||
4b7dd8621a |
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,4 +1,6 @@
|
||||
.idea
|
||||
/build/
|
||||
/install/
|
||||
/log/
|
||||
/log/
|
||||
/src/build/
|
||||
/src/install/
|
@ -3,12 +3,13 @@ FROM osrf/ros:humble-desktop-full-jammy
|
||||
|
||||
# Update and install dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
python3-colcon-common-extensions python3-pip \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
python3-colcon-common-extensions python3-pip
|
||||
|
||||
COPY requirements.txt ./
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
RUN rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Create a workspace
|
||||
WORKDIR /ros2_ws
|
||||
|
||||
@ -21,6 +22,7 @@ RUN . /opt/ros/humble/setup.sh && \
|
||||
|
||||
RUN echo "source /opt/ros/humble/setup.bash" >> ~/.bashrc
|
||||
|
||||
ENV PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/ros/humble/bin"
|
||||
|
||||
# Source the workspace
|
||||
CMD ["/bin/bash"]
|
||||
|
19
README.md
19
README.md
@ -27,6 +27,25 @@ docker build -t interactive-robot-learning-framework .
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
## Setting up Pycharm Environment
|
||||
Add a new Remote Interpreter based on the docker-compose file.
|
||||
Then add the following paths to the interpreter paths. You can find them when clicking on the Interpreter in the lower right corner of the Pycharm window.
|
||||
Then select "Interpreter Settings" within the setting go to "Show All..." Python interpreters (its in the list of the Python Interpreters).
|
||||
Subsequently, you can add with the "+" a new interpreter, choose "Docker Compose" and select the "docker-compose.yaml" file from this repository.
|
||||
The last step is to add the ros2 paths within your docker container to the interpreter paths (its one of the buttons next to the "+".
|
||||
|
||||
The following paths are necessary to develop in the docker container otherwise python cannot find rclpy and the custom msg packages:
|
||||
|
||||
```bash
|
||||
/opt/ros/humble/local/lib/python3.10/dist-packages
|
||||
|
||||
/opt/ros/humble/lib/python3.10/site-packages
|
||||
|
||||
/ros2_ws/install/interaction_msgs/local/lib/python3.10/dist-packages
|
||||
```
|
||||
If there is a
|
||||
|
||||
|
||||
## Framework Structure
|
||||
|
||||
The Interactive Robot Learning Framework consists of several key ROS2 packages, each responsible for different aspects of robot learning and interaction. Below is an overview of each package:
|
||||
|
@ -0,0 +1,12 @@
|
||||
pytest~=6.2.5
|
||||
setuptools==58.2.0
|
||||
numpy~=1.26.4
|
||||
pydot~=1.4.2
|
||||
empy~=3.3.4
|
||||
lark~=1.1.1
|
||||
scipy~=1.12.0
|
||||
scikit-learn~=1.4.0
|
||||
transitions==0.9.0
|
||||
movement-primitives[all]~=0.7.0
|
||||
cma~=3.3.0
|
||||
PyYAML~=5.4.1
|
@ -1,4 +0,0 @@
|
||||
import rclpy
|
||||
from rclpy.node import Node
|
||||
|
||||
from interaction_msgs.srv import Query
|
@ -1,12 +0,0 @@
|
||||
class RegularQuery:
|
||||
def __init__(self, regular, episode):
|
||||
self.regular = int(regular)
|
||||
self.counter = episode
|
||||
|
||||
def query(self):
|
||||
|
||||
if self.counter % self.regular == 0 and self.counter != 0:
|
||||
return True
|
||||
|
||||
else:
|
||||
return False
|
@ -1,4 +0,0 @@
|
||||
[develop]
|
||||
script_dir=$base/lib/InteractionQuery
|
||||
[install]
|
||||
install_scripts=$base/lib/InteractionQuery
|
@ -1,4 +0,0 @@
|
||||
[develop]
|
||||
script_dir=$base/lib/ObjectiveFunctions
|
||||
[install]
|
||||
install_scripts=$base/lib/ObjectiveFunctions
|
@ -1,4 +0,0 @@
|
||||
[develop]
|
||||
script_dir=$base/lib/Optimizers
|
||||
[install]
|
||||
install_scripts=$base/lib/Optimizers
|
@ -1,4 +0,0 @@
|
||||
[develop]
|
||||
script_dir=$base/lib/RepresentationModels
|
||||
[install]
|
||||
install_scripts=$base/lib/RepresentationModels
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,7 +1,7 @@
|
||||
<?xml version="1.0"?>
|
||||
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
|
||||
<package format="3">
|
||||
<name>Optimizers</name>
|
||||
<name>interaction_benchmark</name>
|
||||
<version>0.0.0</version>
|
||||
<description>TODO: Package description</description>
|
||||
<maintainer email="nikolaus.feith@unileoben.ac.at">niko</maintainer>
|
4
src/interaction_benchmark/setup.cfg
Normal file
4
src/interaction_benchmark/setup.cfg
Normal file
@ -0,0 +1,4 @@
|
||||
[develop]
|
||||
script_dir=$base/lib/interaction_benchmark
|
||||
[install]
|
||||
install_scripts=$base/lib/interaction_benchmark
|
@ -1,6 +1,6 @@
|
||||
from setuptools import find_packages, setup
|
||||
|
||||
package_name = 'ObjectiveFunctions'
|
||||
package_name = 'interaction_benchmark'
|
||||
|
||||
setup(
|
||||
name=package_name,
|
@ -9,7 +9,18 @@ find_package(ament_cmake REQUIRED)
|
||||
find_package(rosidl_default_generators REQUIRED)
|
||||
|
||||
rosidl_generate_interfaces(${PROJECT_NAME}
|
||||
"action/TaskEvaluation.action"
|
||||
"srv/Query.srv"
|
||||
"srv/Task.srv"
|
||||
"srv/UserInterface.srv"
|
||||
"srv/ParameterChange.srv"
|
||||
# "srv/TaskEvaluation.srv"
|
||||
"msg/OptimizerState.msg"
|
||||
"msg/Opt2UI.msg"
|
||||
"msg/UI2Opt.msg"
|
||||
"msg/TaskOrder.msg"
|
||||
# "msg/Opt2Task.msg"
|
||||
# "msg/Task2Opt.msg"
|
||||
DEPENDENCIES
|
||||
)
|
||||
|
||||
|
26
src/interaction_msgs/action/TaskEvaluation.action
Normal file
26
src/interaction_msgs/action/TaskEvaluation.action
Normal file
@ -0,0 +1,26 @@
|
||||
# Goal
|
||||
bool user_input
|
||||
uint16 number_of_population
|
||||
float32 duration
|
||||
uint16 number_of_time_steps
|
||||
|
||||
# case if user_input is true
|
||||
float32[] user_parameters # Length: number_of_dimensions * number_of_parameters_per_dimension
|
||||
float32[] user_covariance_diag # Length: number_of_dimensions * number_of_parameters_per_dimension
|
||||
float32[] current_optimizer_mean # Length: number_of_dimensions * number_of_parameters_per_dimension
|
||||
float32[] conditional_points # Length: (number_of_dimensions + time_stamp[0,1]) * number_of_conditional_points
|
||||
float32[] weight_parameter # this parameter sets the weighted average 0 dont trust user 1 completly trust user (it is set by the user or it is decays over time i have to do some experiments on that)
|
||||
|
||||
# case if user_input is false
|
||||
uint16 number_of_dimensions # this is the number of ProMPs * 2 (Position and Velocity)
|
||||
uint16 number_of_parameters_per_dimensions
|
||||
float32[] parameter_array # Length: number_of_population * number_of_dimensions * number_of_parameters_per_dimension
|
||||
---
|
||||
# Feedback
|
||||
string current_state
|
||||
uint16 processed_trajectories
|
||||
---
|
||||
# Result
|
||||
float32[] new_means # Length: number_of_population * number_of_dimensions * number_of_parameters_per_dimension, this is needed because in case of user input the parameters arent known yet
|
||||
float32[] score # Length: number_of_population
|
||||
|
8
src/interaction_msgs/msg/Opt2Task.msg
Normal file
8
src/interaction_msgs/msg/Opt2Task.msg
Normal file
@ -0,0 +1,8 @@
|
||||
# Return reward - True: for updating the model, False: for displaying the x_best to the user
|
||||
bool reward_return
|
||||
# Number of dimensions of the representation model
|
||||
uint16 nr_dim
|
||||
# Number of parameters per dimensions
|
||||
uint16 nr_parameter
|
||||
# Next parameters
|
||||
float32[] x_next
|
5
src/interaction_msgs/msg/Opt2UI.msg
Normal file
5
src/interaction_msgs/msg/Opt2UI.msg
Normal file
@ -0,0 +1,5 @@
|
||||
# Best parameter set so far
|
||||
float32[] x_best
|
||||
|
||||
# parameters which were fixed by the user
|
||||
bool[] fixed_parameters
|
9
src/interaction_msgs/msg/OptimizerState.msg
Normal file
9
src/interaction_msgs/msg/OptimizerState.msg
Normal file
@ -0,0 +1,9 @@
|
||||
# Current State
|
||||
string current_state
|
||||
|
||||
# Current BO Episode
|
||||
int32 current_episode
|
||||
|
||||
# Best result so far
|
||||
float32 reward_best
|
||||
float32[] x_best
|
4
src/interaction_msgs/msg/Task2Opt.msg
Normal file
4
src/interaction_msgs/msg/Task2Opt.msg
Normal file
@ -0,0 +1,4 @@
|
||||
# observed parameters
|
||||
float32[] x_observed
|
||||
# observed reward
|
||||
float32 reward
|
1
src/interaction_msgs/msg/TaskOrder.msg
Normal file
1
src/interaction_msgs/msg/TaskOrder.msg
Normal file
@ -0,0 +1 @@
|
||||
uint16 bla
|
7
src/interaction_msgs/msg/UI2Opt.msg
Normal file
7
src/interaction_msgs/msg/UI2Opt.msg
Normal file
@ -0,0 +1,7 @@
|
||||
# parameters which were fixed by the user
|
||||
bool[] fixed_parameters
|
||||
# parameters set by the user
|
||||
float32[] set_parameter_values
|
||||
# proposed new poses and their time steps
|
||||
float32[] new_poses
|
||||
float32[] new_poses_time_stamps
|
6
src/interaction_msgs/srv/ParameterChange.srv
Normal file
6
src/interaction_msgs/srv/ParameterChange.srv
Normal file
@ -0,0 +1,6 @@
|
||||
string[] parameter_name
|
||||
string[] parameter_type # One of 'float', 'string', 'bool', potentially others
|
||||
string[] parameter_value
|
||||
---
|
||||
bool success
|
||||
string message # For error reporting
|
@ -1,3 +1,6 @@
|
||||
# MODES: random:=0, regular:=1, improvement:=2
|
||||
uint16 modes
|
||||
|
||||
# random query
|
||||
float32 threshold
|
||||
|
||||
@ -7,9 +10,9 @@ uint16 current_episode
|
||||
|
||||
# improvement query
|
||||
# float32 threshold
|
||||
uint16 period
|
||||
# uint16 frequency
|
||||
uint16 last_queried_episode
|
||||
float32[] rewards
|
||||
float32[] last_rewards
|
||||
|
||||
---
|
||||
bool interaction
|
18
src/interaction_msgs/srv/Task.srv
Normal file
18
src/interaction_msgs/srv/Task.srv
Normal file
@ -0,0 +1,18 @@
|
||||
# Number of dimensions of the representation model
|
||||
uint16 nr_dim
|
||||
# Number of parameters per dimensions
|
||||
uint16 nr_parameter
|
||||
# Next parameters
|
||||
float32[] x_next
|
||||
# time array
|
||||
float32[] time_steps_array
|
||||
# did the user updated trajectory in the ui
|
||||
bool user_update
|
||||
# if the user updated update Pose + its timestamp (multiple Poses possible
|
||||
float32[] update_pose
|
||||
float32[] update_timestamps
|
||||
---
|
||||
# observed parameters
|
||||
float32[] x_observed
|
||||
# observed reward
|
||||
float32 reward
|
20
src/interaction_msgs/srv/TaskEvaluation.srv
Normal file
20
src/interaction_msgs/srv/TaskEvaluation.srv
Normal file
@ -0,0 +1,20 @@
|
||||
bool user_input
|
||||
uint16 number_of_population
|
||||
float32 duration
|
||||
uint16 number_of_time_steps
|
||||
|
||||
# case if user_input is true
|
||||
float32[] user_parameters # Length: number_of_dimensions * number_of_parameters_per_dimension
|
||||
float32[] user_covariance_diag # Length: number_of_dimensions * number_of_parameters_per_dimension
|
||||
float32[] current_cma_mean # Length: number_of_dimensions * number_of_parameters_per_dimension
|
||||
float32[] conditional_points # Length: (number_of_dimensions + time_stamp[0,1]) * number_of_conditional_points
|
||||
float32[] weight_parameter # this parameter sets the weighted average 0 dont trust user 1 completly trust user (it is set by the user or it is decays over time i have to do some experiments on that)
|
||||
|
||||
# case if user_input is false
|
||||
uint16 number_of_dimensions # this is the number of ProMPs * 2 (Position and Velocity)
|
||||
uint16 number_of_parameters_per_dimensions
|
||||
float32[] parameter_array # Length: number_of_population * number_of_dimensions * number_of_parameters_per_dimension
|
||||
---
|
||||
# response
|
||||
float32[] parameter_array # this is needed because in case of user input the parameters arent known yet
|
||||
float32[] score
|
8
src/interaction_msgs/srv/UserInterface.srv
Normal file
8
src/interaction_msgs/srv/UserInterface.srv
Normal file
@ -0,0 +1,8 @@
|
||||
float32[] current_cma_mean # Length: number_of_dimensions * number_of_parameters_per_dimension
|
||||
float32[] best_parameters # Length: number_of_dimensions * number_of_parameters_per_dimension
|
||||
float32[] current_user_covariance_diag # Length: number_of_dimensions * number_of_parameters_per_dimension
|
||||
---
|
||||
float32[] user_parameters # Length: number_of_dimensions * number_of_parameters_per_dimension
|
||||
float32[] user_covariance_diag # Length: number_of_dimensions * number_of_parameters_per_dimension
|
||||
float32[] conditional_points # Length: (number_of_dimensions + time_stamp[0,1]) * number_of_conditional_points
|
||||
float32 weight_parameter # this parameter sets the weighted average 0 dont trust user 1 completly trust user (it is set by the user or it is decays over time i have to do some experiments on that)
|
@ -0,0 +1,78 @@
|
||||
import rclpy
|
||||
from rclpy.node import Node
|
||||
from transitions.extensions.asyncio import AsyncMachine
|
||||
from std_msgs.msg import String
|
||||
import asyncio
|
||||
import threading
|
||||
import time
|
||||
|
||||
|
||||
class StatefulNode(Node):
|
||||
def __init__(self, event_loop):
|
||||
super().__init__('stateful_node')
|
||||
self.event_loop = event_loop
|
||||
self.subscription = self.create_subscription(String, 'my_topic', self.listener_callback, 10)
|
||||
|
||||
states = [
|
||||
{'name': 'state1', 'on_enter': 'on_enter_state1'},
|
||||
{'name': 'state2', 'on_enter': 'on_enter_state2'},
|
||||
{'name': 'state3', 'on_enter': 'on_enter_state3'}
|
||||
]
|
||||
transitions = [
|
||||
{'trigger': 'go_to_state1', 'source': '*', 'dest': 'state1'},
|
||||
{'trigger': 'go_to_state2', 'source': '*', 'dest': 'state2'},
|
||||
{'trigger': 'go_to_state3', 'source': '*', 'dest': 'state3'},
|
||||
]
|
||||
|
||||
self.machine = AsyncMachine(model=self, states=states, transitions=transitions, initial='state1')
|
||||
|
||||
async def on_enter_state1(self):
|
||||
self.get_logger().info(f"Entering State 1 - {time.time()}")
|
||||
|
||||
async def on_enter_state2(self):
|
||||
self.get_logger().info(f"Entering State 2 - {time.time()}")
|
||||
|
||||
async def on_enter_state3(self):
|
||||
self.get_logger().info(f"Entering State 3 - {time.time()}")
|
||||
|
||||
def listener_callback(self, msg):
|
||||
try:
|
||||
self.get_logger().info(f'Received message: "{msg.data}"')
|
||||
if msg.data == 'trigger1':
|
||||
self.get_logger().info("Attempting to go to state2")
|
||||
asyncio.run_coroutine_threadsafe(self.go_to_state1(), self.event_loop)
|
||||
elif msg.data == 'trigger2':
|
||||
self.get_logger().info("Attempting to go to state1")
|
||||
asyncio.run_coroutine_threadsafe(self.go_to_state2(), self.event_loop)
|
||||
elif msg.data == 'trigger3':
|
||||
self.get_logger().info("Attempting to go to state1")
|
||||
asyncio.run_coroutine_threadsafe(self.go_to_state3(), self.event_loop)
|
||||
except Exception as e:
|
||||
self.get_logger().error(f"Error in listener_callback: {str(e)}")
|
||||
|
||||
|
||||
def run_asyncio_loop():
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
loop.run_forever()
|
||||
return loop # Ensure you return the loop reference
|
||||
|
||||
|
||||
def main():
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
threading.Thread(target=loop.run_forever, daemon=True).start()
|
||||
|
||||
# Initialize ROS and pass the event loop to the node
|
||||
rclpy.init()
|
||||
node = StatefulNode(loop)
|
||||
|
||||
# Now, the ROS 2 node has the correct event loop reference for asyncio operations
|
||||
rclpy.spin(node)
|
||||
|
||||
node.destroy_node()
|
||||
rclpy.shutdown()
|
||||
loop.call_soon_threadsafe(loop.stop)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,7 +1,7 @@
|
||||
<?xml version="1.0"?>
|
||||
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
|
||||
<package format="3">
|
||||
<name>RepresentationModels</name>
|
||||
<name>interaction_objective_function</name>
|
||||
<version>0.0.0</version>
|
||||
<description>TODO: Package description</description>
|
||||
<maintainer email="nikolaus.feith@unileoben.ac.at">niko</maintainer>
|
4
src/interaction_objective_function/setup.cfg
Normal file
4
src/interaction_objective_function/setup.cfg
Normal file
@ -0,0 +1,4 @@
|
||||
[develop]
|
||||
script_dir=$base/lib/interaction_objective_function
|
||||
[install]
|
||||
install_scripts=$base/lib/interaction_objective_function
|
26
src/interaction_objective_function/setup.py
Normal file
26
src/interaction_objective_function/setup.py
Normal file
@ -0,0 +1,26 @@
|
||||
from setuptools import find_packages, setup
|
||||
|
||||
package_name = 'interaction_objective_function'
|
||||
|
||||
setup(
|
||||
name=package_name,
|
||||
version='0.0.0',
|
||||
packages=find_packages(exclude=['test']),
|
||||
data_files=[
|
||||
('share/ament_index/resource_index/packages',
|
||||
['resource/' + package_name]),
|
||||
('share/' + package_name, ['package.xml']),
|
||||
],
|
||||
install_requires=['setuptools'],
|
||||
zip_safe=True,
|
||||
maintainer='niko',
|
||||
maintainer_email='nikolaus.feith@unileoben.ac.at',
|
||||
description='TODO: Package description',
|
||||
license='TODO: License declaration',
|
||||
tests_require=['pytest'],
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'async_node = interaction_objective_function.async_testing:main'
|
||||
],
|
||||
},
|
||||
)
|
@ -0,0 +1,4 @@
|
||||
initial_mean_centre: 0.0
|
||||
initial_mean_std_dev: 0.2
|
||||
initial_variance: 0.3
|
||||
random_seed: ''
|
@ -0,0 +1,4 @@
|
||||
from .confidence_bounds import ConfidenceBounds
|
||||
from .probability_of_improvement import ProbabilityOfImprovement
|
||||
from .expected_improvement import ExpectedImprovement
|
||||
from .preference_expected_improvement import PreferenceExpectedImprovement
|
@ -0,0 +1,31 @@
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class ConfidenceBounds:
|
||||
def __init__(self, nr_weights, nr_samples=100, beta=1.2, seed=None, lower_bound=-1.0, upper_bound=1.0):
|
||||
self.nr_weights = nr_weights
|
||||
self.nr_samples = nr_samples
|
||||
self.beta = beta # if beta negative => lower confidence bounds
|
||||
self.lower_bound = lower_bound
|
||||
self.upper_bound = upper_bound
|
||||
self.seed = seed
|
||||
|
||||
def __call__(self, gauss_process, _, seed=None):
|
||||
# if seed is set for whole experiment
|
||||
if self.seed is not None:
|
||||
seed = self.seed
|
||||
|
||||
# random generator
|
||||
rng = np.random.default_rng(seed)
|
||||
|
||||
# sample from the surrogate
|
||||
x_test = rng.uniform(self.lower_bound, self.upper_bound, size=(self.nr_samples, self.nr_weights))
|
||||
mu, sigma = gauss_process.predict(x_test, return_std=True)
|
||||
|
||||
# upper/lower confidence bounds
|
||||
cb = mu + self.beta * sigma
|
||||
|
||||
# get the best result and return it
|
||||
idx = np.argmax(cb)
|
||||
return x_test[idx, :]
|
@ -0,0 +1,37 @@
|
||||
|
||||
import numpy as np
|
||||
from scipy.stats import norm
|
||||
|
||||
|
||||
class ExpectedImprovement:
|
||||
def __init__(self, nr_weights, nr_samples=100, kappa=0.0, seed=None, lower_bound=-1.0, upper_bound=1.0):
|
||||
self.nr_weights = nr_weights
|
||||
self.nr_samples = nr_samples
|
||||
self.kappa = kappa
|
||||
self.lower_bound = lower_bound
|
||||
self.upper_bound = upper_bound
|
||||
self.seed = seed
|
||||
|
||||
def __call__(self, gauss_process, x_observed, seed=None):
|
||||
# if seed is set for whole experiment
|
||||
if self.seed is not None:
|
||||
seed = self.seed
|
||||
|
||||
# random generator
|
||||
rng = np.random.default_rng(seed)
|
||||
|
||||
# get the best so far observed y
|
||||
mu = gauss_process.predict(x_observed)
|
||||
y_best = max(mu)
|
||||
|
||||
# sample from surrogate
|
||||
x_test = rng.uniform(self.lower_bound, self.upper_bound, size=(self.nr_samples, self.nr_weights))
|
||||
mu, sigma = gauss_process.predict(x_test, return_std=True)
|
||||
|
||||
# expected improvement
|
||||
z = (mu - y_best - self.kappa) / sigma
|
||||
ei = (mu - y_best - self.kappa) * norm.cdf(z) + sigma * norm.pdf(z)
|
||||
|
||||
# get the best result and return it
|
||||
idx = np.argmax(ei)
|
||||
return x_test[idx, :]
|
@ -0,0 +1,93 @@
|
||||
|
||||
import numpy as np
|
||||
from scipy.stats import norm
|
||||
|
||||
|
||||
class PreferenceExpectedImprovement:
|
||||
def __init__(self, nr_dims, initial_variance, update_variance, nr_samples=100,
|
||||
kappa=0.0, lower_bound=None, upper_bound=None, seed=None, fixed_dims=None):
|
||||
self.nr_dims = nr_dims
|
||||
|
||||
self.initial_variance = initial_variance
|
||||
self.update_variance = update_variance
|
||||
|
||||
self.nr_samples = nr_samples
|
||||
self.kappa = kappa
|
||||
|
||||
if lower_bound is None:
|
||||
self.lower_bound = [-1.] * self.nr_dims
|
||||
else:
|
||||
self.lower_bound = lower_bound
|
||||
|
||||
if upper_bound is None:
|
||||
self.upper_bound = [1.] * self.nr_dims
|
||||
else:
|
||||
self.upper_bound = upper_bound
|
||||
|
||||
self.seed = seed
|
||||
|
||||
# initial proposal distribution
|
||||
self.proposal_mean = np.zeros((nr_dims, 1))
|
||||
self.proposal_cov = np.diag(np.ones((nr_dims,)) * self.initial_variance)
|
||||
|
||||
# fixed dimension for robot experiment
|
||||
self.fixed_dims = fixed_dims
|
||||
|
||||
def rejection_sampling(self, seed=None):
|
||||
rng = np.random.default_rng(seed)
|
||||
|
||||
samples = np.empty((0, self.nr_dims))
|
||||
while samples.shape[0] < self.nr_samples:
|
||||
# sample from the multi variate gaussian distribution
|
||||
sample = np.zeros((1, self.nr_dims))
|
||||
for i in range(self.nr_dims):
|
||||
if i in self.fixed_dims:
|
||||
sample[0, i] = self.fixed_dims[i]
|
||||
else:
|
||||
check = False
|
||||
while not check:
|
||||
sample[0, i] = rng.normal(self.proposal_mean[i], self.proposal_cov[i, i])
|
||||
if self.lower_bound[i] <= sample[0, i] <= self.upper_bound[i]:
|
||||
check = True
|
||||
|
||||
samples = np.append(samples, sample, axis=0)
|
||||
|
||||
return samples
|
||||
|
||||
def __call__(self, gauss_process, x_observed, seed=None):
|
||||
# if seed is set for whole experiment
|
||||
if self.seed is not None:
|
||||
seed = self.seed
|
||||
|
||||
# get the best so far observed y
|
||||
mu = gauss_process.predict(x_observed)
|
||||
y_best = max(mu)
|
||||
|
||||
# sample from surrogate
|
||||
x_test = self.rejection_sampling(seed)
|
||||
mu, sigma = gauss_process.predict(x_test, return_std=True)
|
||||
|
||||
# expected improvement
|
||||
z = (mu - y_best - self.kappa) / sigma
|
||||
ei = (mu - y_best - self.kappa) * norm.cdf(z) + sigma * norm.pdf(z)
|
||||
|
||||
# get the best result and return it
|
||||
idx = np.argmax(ei)
|
||||
return x_test[idx, :]
|
||||
|
||||
def update_proposal_model(self, preference_mean, preference_bool):
|
||||
cov_diag = np.ones((self.nr_dims,)) * self.initial_variance
|
||||
cov_diag[preference_bool] = self.update_variance
|
||||
|
||||
preference_cov = np.diag(cov_diag)
|
||||
|
||||
preference_mean = preference_mean.reshape(-1, 1)
|
||||
|
||||
posterior_mean = np.linalg.inv(np.linalg.inv(self.proposal_cov) + np.linalg.inv(preference_cov))\
|
||||
.dot(np.linalg.inv(self.proposal_cov).dot(self.proposal_mean)
|
||||
+ np.linalg.inv(preference_cov).dot(preference_mean))
|
||||
|
||||
posterior_cov = np.linalg.inv(np.linalg.inv(self.proposal_cov) + np.linalg.inv(preference_cov))
|
||||
|
||||
self.proposal_mean = posterior_mean
|
||||
self.proposal_cov = posterior_cov
|
@ -0,0 +1,37 @@
|
||||
|
||||
import numpy as np
|
||||
from scipy.stats import norm
|
||||
|
||||
|
||||
class ProbabilityOfImprovement:
|
||||
def __init__(self, nr_weights, nr_samples=100, kappa=0.0, seed=None, lower_bound=-1.0, upper_bound=1.0):
|
||||
self.nr_weights = nr_weights
|
||||
self.nr_samples = nr_samples
|
||||
self.kappa = kappa
|
||||
self.lower_bound = lower_bound
|
||||
self.upper_bound = upper_bound
|
||||
self.seed = seed
|
||||
|
||||
def __call__(self, gauss_process, x_observed, seed=None):
|
||||
# if seed is set for whole experiment
|
||||
if self.seed is not None:
|
||||
seed = self.seed
|
||||
|
||||
# random generator
|
||||
rng = np.random.default_rng(seed)
|
||||
|
||||
# get the best so far observed y
|
||||
mu = gauss_process.predict(x_observed)
|
||||
y_best = max(mu)
|
||||
|
||||
# sample from surrogate
|
||||
x_test = rng.uniform(self.lower_bound, self.upper_bound, size=(self.nr_samples, self.nr_weights))
|
||||
mu, sigma = gauss_process.predict(x_test, return_std=True)
|
||||
|
||||
# probability of improvement
|
||||
z = (mu - y_best - self.kappa) / sigma
|
||||
pi = norm.cdf(z)
|
||||
|
||||
# get the best result and return it
|
||||
idx = np.argmax(pi)
|
||||
return x_test[idx, :]
|
@ -0,0 +1,326 @@
|
||||
import rclpy
|
||||
from rclpy.node import Node
|
||||
|
||||
from rclpy.callback_groups import ReentrantCallbackGroup
|
||||
|
||||
from interaction_msgs.srv import Query
|
||||
from interaction_msgs.srv import Task
|
||||
from interaction_msgs.msg import UI2Opt
|
||||
from interaction_msgs.msg import TaskOrder
|
||||
from interaction_msgs.msg import Opt2UI
|
||||
from interaction_msgs.msg import OptimizerState
|
||||
|
||||
|
||||
from .optimizers.bayesian_optimization import BayesianOptimization
|
||||
|
||||
from transitions.extensions.asyncio import AsyncMachine
|
||||
import asyncio
|
||||
|
||||
import threading
|
||||
import time
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class BayesianOptimizationNode(Node):
|
||||
def __init__(self, event_loop):
|
||||
super().__init__('bayesian_optimization_node')
|
||||
self.event_loop = event_loop
|
||||
|
||||
# region Parameters
|
||||
"""
|
||||
self.acquisition_function_name = (self.declare_parameter('acquisition_function_name', 'EI')
|
||||
.get_parameter_value().string_value)
|
||||
self.nr_bo_steps = self.declare_parameter('bo_steps', 100).get_parameter_value().integer_value
|
||||
self.nr_dimensions = self.declare_parameter('nr_dimensions', 1).get_parameter_value().integer_value
|
||||
self.nr_policy_parameters = (self.declare_parameter('nr_policy_parameters', 100)
|
||||
.get_parameter_value().integer_value)
|
||||
"""
|
||||
self.kernel_type = self.declare_parameter('kernel_type', 'Matern').get_parameter_value().string_value
|
||||
self.nr_init = self.declare_parameter('nr_init', 3).get_parameter_value().integer_value
|
||||
self.task_request_timeout = self.declare_parameter('task_request_timeout', 10).get_parameter_value().integer_value
|
||||
self.task_request_max_tries = self.declare_parameter('task_request_max_tries', 3).get_parameter_value().integer_value
|
||||
# endregion
|
||||
|
||||
# State Machine
|
||||
states = [
|
||||
{'name': 'idle'},
|
||||
{'name': 'initialize_bo', 'on_enter': 'initialize_bo_fun'},
|
||||
{'name': 'user_query', 'on_enter': 'user_query_fun'},
|
||||
{'name': 'non_interactive_mode', 'on_enter': 'non_interactive_mode_fun'},
|
||||
{'name': 'waiting_for_user_response', 'on_enter': 'waiting_for_user_response_fun'},
|
||||
{'name': 'processing_user_proposal_fun', 'on_enter': 'processing_fun'},
|
||||
]
|
||||
transitions = [
|
||||
{'trigger': 'order_received', 'source': 'idle', 'dest': 'initialize_bo'},
|
||||
{'trigger': 'initialization_finished', 'source': 'initialize_bo', 'dest': 'user_query'},
|
||||
{'trigger': 'non_interactive', 'source': 'user_query', 'dest': 'non_interactive_mode'},
|
||||
{'trigger': 'non_interactive_finished', 'source': 'non_interactive_mode', 'dest': 'user_query'},
|
||||
{'trigger': 'interactive', 'source': 'user_query', 'dest': 'waiting_for_user_response'},
|
||||
{'trigger': 'user_response_received', 'source': 'waiting_for_user_response', 'dest': 'processing_user_proposal_fun'},
|
||||
{'trigger': 'processing_finished', 'source': 'processing_user_proposal_fun', 'dest': 'user_query'},
|
||||
{'trigger': 'order_completed', 'source': 'waiting_for_task_response', 'dest': 'idle'},
|
||||
{'trigger': 'abort', 'source': '*', 'dest': 'idle'}
|
||||
]
|
||||
|
||||
"""
|
||||
Algo:
|
||||
A) idle -> order_received() -> initializing_bo:
|
||||
order_sub - receives order
|
||||
reset_bo with the order parameters
|
||||
trigger: order_received()
|
||||
|
||||
for i=0:nr_episodes
|
||||
B) initializing_bo -> initialization_finished() -> user_query:
|
||||
for j = 0:nr_init
|
||||
x_next <- initial sampling
|
||||
send x_next to task node
|
||||
reward received from task node
|
||||
fit model
|
||||
trigger: initialization_finished()
|
||||
|
||||
C1.1) user_query -> non_interactive() -> sampling_x_next
|
||||
service call for user_query
|
||||
if user_query is False:
|
||||
trigger: non_interactive()
|
||||
C1.2) sampling_x_next -> sampling_finished() -> processing
|
||||
sampling x_next with acqusition function
|
||||
trigger: sampling_finished()
|
||||
|
||||
C2.1) user_query -> interactive() -> waiting_for_user
|
||||
service call for user_query
|
||||
if user_query is True:
|
||||
trigger: interactive()
|
||||
C2.2) waiting_for_user_response -> user_response_received() -> processing
|
||||
send x_best to task node
|
||||
send x_best to UI
|
||||
user adapts x_best
|
||||
(UI sends x_next to task node)
|
||||
receives x_next from UI
|
||||
trigger: user_response_received()
|
||||
|
||||
D) processing -> processing_finished() -> waiting_for_task_response
|
||||
sends x_next to task node
|
||||
trigger: processing_finished()
|
||||
|
||||
E) waiting_for_task_response -> task_response_received() -> user_query
|
||||
if episode < nr_episodes:
|
||||
get results from reward_sub
|
||||
model fit with additional observation
|
||||
trigger: task_response_received()
|
||||
|
||||
F) waiting_for_task_response -> order_completed() -> idle
|
||||
if episode = nr_episodes:
|
||||
completion_pub with the completed results
|
||||
trigger: order_completed()
|
||||
|
||||
|
||||
Additional Transitions:
|
||||
Abort: * -> abort() -> idle
|
||||
"""
|
||||
|
||||
self.machine = AsyncMachine(model=self, states=states,
|
||||
transitions=transitions, initial='idle',
|
||||
ignore_invalid_triggers=True)
|
||||
|
||||
|
||||
# Subscribers
|
||||
self.ui_sub = self.create_subscription(UI2Opt, 'interaction/ui_response', self.ui_callback, 10)
|
||||
self.order_sub = self.create_subscription(TaskOrder, 'interaction/order', self.task_order_callback, 10)
|
||||
|
||||
# Publishers
|
||||
self.ui_pub = self.create_publisher(Opt2UI, 'interaction/ui_request', 10)
|
||||
self.state_pub = self.create_publisher(OptimizerState, 'interaction/optimizer/state', 10)
|
||||
|
||||
# Service Clients
|
||||
self.query_client = self.create_client(Query, 'interaction/user_query_srv')
|
||||
self.task_client = self.create_client(Task, 'interaction/task_srv')
|
||||
|
||||
# Timer Objects
|
||||
self.state_timer = self.create_timer(0.1, self.state_callback)
|
||||
# Bayesian Optimization
|
||||
self.bayesian_optimization = None
|
||||
|
||||
self.current_episodes = 0
|
||||
self.nr_bo_steps = 0
|
||||
self.nr_dimensions = 0
|
||||
self.nr_policy_parameters = 0
|
||||
self.acquisition_function_name = ''
|
||||
self.fixed_dimensions = None
|
||||
|
||||
self.seed = None
|
||||
self.lower_bound = None
|
||||
self.upper_bound = None
|
||||
|
||||
self.x_temp = None # temp x_next
|
||||
|
||||
|
||||
self.get_logger().info(f"Bayesian optimization Node started up!")
|
||||
|
||||
def reset_bo(self, fixed_dimensions=None, **kwargs):
|
||||
self.bayesian_optimization = BayesianOptimization(self.nr_bo_steps,
|
||||
self.nr_dimensions,
|
||||
self.nr_policy_parameters,
|
||||
seed=self.seed,
|
||||
fixed_dimensions=fixed_dimensions,
|
||||
lower_bound=self.lower_bound,
|
||||
upper_bound=self.upper_bound,
|
||||
acquisition_function_name=self.acquisition_function_name,
|
||||
kernel_type=self.kernel_type, **kwargs)
|
||||
|
||||
# region Callback & Async Send functions
|
||||
def task_order_callback(self, msg):
|
||||
self.current_episodes = 0
|
||||
self.nr_bo_steps = msg.nr_bo_steps
|
||||
self.nr_dimensions = msg.nr_dimensions
|
||||
self.nr_policy_parameters = msg.nr_policy_parameters
|
||||
self.seed = msg.seed
|
||||
self.fixed_dimensions = msg.fixed_dimensions
|
||||
self.acquisition_function_name = msg.acquisition_function_name
|
||||
|
||||
asyncio.run_coroutine_threadsafe(self.order_received(), self.event_loop)
|
||||
|
||||
def ui_callback(self, msg):
|
||||
self.fixed_dimensions = msg.fixed_dimensions
|
||||
self.x_temp = msg.set_parameter_values
|
||||
|
||||
asyncio.run_coroutine_threadsafe(self.user_response_received)
|
||||
|
||||
async def send_task_request_with_retry(self, request):
|
||||
for attempt in range(self.task_request_max_tries):
|
||||
self.get_logger().info(f"Attempt {attempt + 1}, send task request with retry!")
|
||||
|
||||
future = self.task_client.call_async(request)
|
||||
try:
|
||||
response = await asyncio.wait_for(future, self.task_request_timeout)
|
||||
return response
|
||||
except asyncio.TimeoutError:
|
||||
self.get_logger().warning(f"Task request timed out, resending...")
|
||||
|
||||
self.get_logger().error(f"Max retries reached for task request: {self.task_request_max_tries}")
|
||||
return None
|
||||
|
||||
def state_callback(self):
|
||||
msg = OptimizerState()
|
||||
msg.current_state = self.state
|
||||
if self.state != 'idle':
|
||||
msg.current_episode = self.bayesian_optimization.episode
|
||||
msg.reward_best, msg.x_best, _ = self.bayesian_optimization.get_best_reward()
|
||||
|
||||
self.state_pub.publish(msg)
|
||||
# endregion
|
||||
|
||||
# State Methods
|
||||
async def initializing_bo_fun(self):
|
||||
self.get_logger().info('Initializing bo...')
|
||||
|
||||
self.reset_bo(fixed_dimensions=self.fixed_dimensions)
|
||||
|
||||
request = Task.Request()
|
||||
request.nr_dim = self.nr_dimensions
|
||||
request.nr_parameter = self.nr_policy_parameters
|
||||
for i in range(self.nr_init):
|
||||
request.x_next = self.bayesian_optimization.sample_random_next_observation()
|
||||
|
||||
response = await self.send_task_request_with_retry(request)
|
||||
if i < self.nr_init:
|
||||
self.bayesian_optimization.add_observation(response.reward, response.x_observed, fit=False)
|
||||
else:
|
||||
self.bayesian_optimization.add_observation(response.reward, response.x_observed)
|
||||
|
||||
await self.initialization_finished()
|
||||
|
||||
async def user_query_fun(self):
|
||||
self.get_logger().info('Deciding wheter to ask the user...')
|
||||
srv_msg = Query()
|
||||
future = self.query_client.call_async(srv_msg)
|
||||
try:
|
||||
response = await asyncio.wait_for(future, timeout=2)
|
||||
if response.interaction:
|
||||
await self.interactive()
|
||||
|
||||
else:
|
||||
await self.non_interactive()
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
self.get_logger().info("Timeout for Query Service...")
|
||||
|
||||
async def non_interactive_mode_fun(self):
|
||||
request = Task.Request()
|
||||
request.nr_dim = self.nr_dimensions
|
||||
request.nr_parameter = self.nr_policy_parameters
|
||||
|
||||
request.x_next = self.bayesian_optimization.next_observation()
|
||||
|
||||
response = await self.send_task_request_with_retry(request)
|
||||
self.bayesian_optimization.add_observation(response.reward, response.x_observed)
|
||||
|
||||
if self.bayesian_optimization.episode == self.nr_bo_steps:
|
||||
await self.order_completed()
|
||||
await self.non_interactive_finished()
|
||||
|
||||
async def waiting_for_user_response_fun(self):
|
||||
self.get_logger().info('Waiting for user response...')
|
||||
|
||||
# sending the best result so far to display it to the user
|
||||
_, x_max, _ = self.bayesian_optimization.get_best_result()
|
||||
|
||||
ui_msg = Opt2UI()
|
||||
ui_msg.x_best = x_max
|
||||
ui_msg.fixed_parameters = self.fixed_dimensions
|
||||
self.ui_pub.publish(ui_msg)
|
||||
|
||||
# send it to the task node to display the movement
|
||||
request = Task.Request()
|
||||
request.nr_dim = self.nr_dimensions
|
||||
request.nr_parameter = self.nr_policy_parameters
|
||||
|
||||
request.x_next = self.bayesian_optimization.next_observation()
|
||||
|
||||
_ = await self.send_task_request_with_retry(request)
|
||||
|
||||
self.get_logger().info('User best solution displayed completed.')
|
||||
|
||||
async def processing_user_proposal_fun(self):
|
||||
request = Task.Request()
|
||||
request.nr_dim = self.nr_dimensions
|
||||
request.nr_parameter = self.nr_policy_parameters
|
||||
|
||||
request.x_next = self.x_temp
|
||||
self.x_temp = None
|
||||
|
||||
response = await self.send_task_request_with_retry(request)
|
||||
self.bayesian_optimization.add_observation(response.reward, response.x_observed)
|
||||
|
||||
if self.bayesian_optimization.episode == self.nr_bo_steps:
|
||||
await self.order_completed()
|
||||
await self.non_interactive_finished()
|
||||
|
||||
|
||||
def run_asyncio_loop():
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
loop.run_forever()
|
||||
return loop # Ensure you return the loop reference
|
||||
|
||||
|
||||
def main():
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
threading.Thread(target=loop.run_forever, daemon=True).start()
|
||||
|
||||
# Initialize ROS and pass the event loop to the node
|
||||
rclpy.init()
|
||||
node = BayesianOptimizationNode(loop)
|
||||
|
||||
# Now, the ROS 2 node has the correct event loop reference for asyncio operations
|
||||
rclpy.spin(node)
|
||||
|
||||
node.destroy_node()
|
||||
rclpy.shutdown()
|
||||
loop.call_soon_threadsafe(loop.stop)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,348 @@
|
||||
import os
|
||||
|
||||
import rclpy
|
||||
from rclpy.node import Node
|
||||
from rclpy.parameter import Parameter
|
||||
from rclpy.action import ActionClient
|
||||
|
||||
from transitions import Machine
|
||||
import cma
|
||||
import yaml
|
||||
import numpy as np
|
||||
from src.interaction_utils.serialization import flatten_population, unflatten_population
|
||||
|
||||
# Msg/Srv/Action
|
||||
from interaction_msgs.srv import Query
|
||||
from interaction_msgs.action import TaskEvaluation
|
||||
from interaction_msgs.srv import ParameterChange
|
||||
from interaction_msgs.srv import UserInterface
|
||||
from std_msgs.msg import Bool
|
||||
|
||||
|
||||
class CMAESOptimizationNode(Node):
|
||||
def __init__(self):
|
||||
super().__init__('cmaes_optimization_node')
|
||||
|
||||
# CMA-ES Attributes
|
||||
self.cmaes = None
|
||||
self.number_of_initial_episodes = self.declare_parameter('number_of_initial_episodes', 5)
|
||||
self.initial_user_covariance = self.declare_parameter('initial_user_covariance', 1.0)
|
||||
self.episode = 0
|
||||
self.number_of_dimensions = 3
|
||||
self.number_of_parameters_per_dimensions = 10
|
||||
# the number of weights is double the number of dims * params per dim since its Position and Velocity
|
||||
self.number_of_weights = 2 * self.number_of_dimensions * self.number_of_parameters_per_dimensions
|
||||
self.user_covariance = np.ones((self.number_of_weights, 1)) * self.initial_user_covariance.value
|
||||
|
||||
self.random_seed = None
|
||||
|
||||
# Query Attributes
|
||||
self.query_metric = 'random'
|
||||
self.query_parameters = {}
|
||||
self.best_parameters_per_interation = []
|
||||
self.best_reward_per_iteration = []
|
||||
# ROS2 Interfaces
|
||||
self.__future = None
|
||||
self.__send_future = None
|
||||
self.__response_future = None
|
||||
self.__task_response = None
|
||||
self.__user_response = None
|
||||
# Heartbeat Topics - to make sure that there is no deadlock
|
||||
self.heartbeat_timeout = 30 # secs
|
||||
self.heartbeat_timer = self.create_timer(1.0, self.check_heartbeats)
|
||||
|
||||
# Heartbeat
|
||||
self.last_mr_heartbeat_time = None
|
||||
self.mr_heartbeat_sub = self.create_subscription(Bool, 'interaction/mr_heartbeat', self.mr_heatbeat_callback)
|
||||
# Topic
|
||||
|
||||
# Service
|
||||
self.parameter_srv = self.create_service(ParameterChange, 'interaction/cmaes_parameter_srv', self.parameter_callback)
|
||||
self.query_srv = self.create_client(Query, 'interaction/query_srv')
|
||||
self.user_interface_srv = self.create_client(UserInterface, 'interaction/user_interface_srv')
|
||||
|
||||
# Action
|
||||
self._task_action = ActionClient(self, TaskEvaluation, 'interaction/task_action')
|
||||
|
||||
# State Machine
|
||||
# States
|
||||
self.states = [
|
||||
'idle',
|
||||
'initialization',
|
||||
'query_decision',
|
||||
'non_interactive_mode',
|
||||
'wait_for_evaluation',
|
||||
'interactive_mode',
|
||||
'wait_for_user_response',
|
||||
'prepare_user_data_for_evaluation',
|
||||
'wait_for_user_evaluation',
|
||||
'update_optimizer',
|
||||
'check_termination',
|
||||
'complete',
|
||||
'error_recovery',
|
||||
]
|
||||
|
||||
# Initialize state machine
|
||||
self.machine = Machine(self, states=self.states, initial='idle')
|
||||
|
||||
# region Transitions
|
||||
self.machine.add_transition(trigger='order_received', source='idle', dest='initialization')
|
||||
self.machine.add_transition(trigger='initialization_complete', source='initialization', dest='query_decision')
|
||||
self.machine.add_transition(trigger='no_interaction', source='query_decision', dest='non_interactive_mode')
|
||||
self.machine.add_transition(trigger='data_send_for_evaluation', source='non_interactive_mode', dest='wait_for_evaluation')
|
||||
self.machine.add_transition(trigger='evaluation_response_received', source='wait_for_evaluation', dest='update_optimizer')
|
||||
self.machine.add_transition(trigger='interaction', source='query_decision', dest='interactive_mode')
|
||||
self.machine.add_transition(trigger='data_send_to_user', source='interactive_mode', dest='wait_for_user_response')
|
||||
self.machine.add_transition(trigger='user_response_received', source='wait_for_user_response', dest='prepare_user_data_for_evaluation')
|
||||
self.machine.add_transition(trigger='send_user_data_to_evaluation', source='prepare_user_data_for_evaluation', dest='wait_for_user_evaluation')
|
||||
self.machine.add_transition(trigger='received_user_data_for_evaluation', source='wait_for_user_evaluation', dest='update_optimizer')
|
||||
self.machine.add_transition(trigger='optimizer_updated', source='update_optimizer', dest='check_termination')
|
||||
self.machine.add_transition(trigger='next_optimizer_step', source='check_termination', dest='query_decision')
|
||||
self.machine.add_transition(trigger='finished', source='check_termination', dest='complete')
|
||||
self.machine.add_transition(trigger='error_trigger', source='*', dest='error_recovery')
|
||||
self.machine.add_transition(trigger='recovery_complete', source='error_recovery', dest='idle')
|
||||
# endregion
|
||||
|
||||
# region State Functions
|
||||
def on_enter_initialization(self):
|
||||
config_file_path = self.get_parameter('cmaes_config_file_path').get_parameter_value().string_value
|
||||
|
||||
# Load YAML
|
||||
with open(config_file_path, 'r') as file:
|
||||
config = yaml.safe_load(file)
|
||||
|
||||
if config['random_seed'] == '':
|
||||
self.random_seed = None
|
||||
else:
|
||||
self.random_seed = int(config['random_seed'])
|
||||
config['seed'] = self.random_seed
|
||||
|
||||
mean_centre = config['initial_mean_centre']
|
||||
mean_std_dev = config['initial_mean_std_dev']
|
||||
|
||||
random_gen = np.random.default_rng(seed=self.random_seed)
|
||||
|
||||
initial_mean = random_gen.normal(mean_centre, mean_std_dev, size=(self.number_of_weights, 1))
|
||||
initial_variance = config['initial_variance']
|
||||
self.cmaes = cma.CMAEvolutionStrategy(initial_mean, initial_variance, inopts=config)
|
||||
|
||||
# Trigger transition
|
||||
self.initialization_complete()
|
||||
|
||||
def on_enter_query_decision(self):
|
||||
request = Query.Request()
|
||||
|
||||
if self.query_metric == 'random':
|
||||
pass #TODO
|
||||
elif self.query_metric == 'regular':
|
||||
pass #TODO
|
||||
elif self.query_metric == 'improvement':
|
||||
pass #TODO
|
||||
|
||||
self.query_srv.wait_for_service(10)
|
||||
self.__future = self.query_srv.call_async(request)
|
||||
self.__future.add_done_callback(self.handle_query_response)
|
||||
|
||||
def on_enter_non_interactive_mode(self):
|
||||
goal = TaskEvaluation.Goal()
|
||||
goal.user_input = False
|
||||
goal.number_of_population = self.cmaes.popsize
|
||||
goal.number_of_dimensions = self.number_of_dimensions * 2
|
||||
goal.number_of_parameters_per_dimensions = self.number_of_parameters_per_dimensions
|
||||
|
||||
population = self.cmaes.ask()
|
||||
flat_population = flatten_population(population)
|
||||
|
||||
goal.parameter_array = flat_population
|
||||
|
||||
self._task_action.wait_for_server(timeout_sec=10.0)
|
||||
self.__send_future = self._task_action.send_goal_async(goal)
|
||||
self.__send_future.add_done_callback(self._task_response_callback)
|
||||
|
||||
self.data_send_for_evaluation()
|
||||
|
||||
def on_enter_interactive_mode(self):
|
||||
# Reset Mixed Reality heartbeat to check if the other node crashed
|
||||
self.last_mr_heartbeat_time = None
|
||||
|
||||
request = UserInterface.Request()
|
||||
request.current_cma_mean = flatten_population(self.cmaes.mean)
|
||||
request.best_parameters = flatten_population(self.best_parameters_per_interation[-1])
|
||||
request.current_user_covariance_diag = self.user_covariance
|
||||
|
||||
self.__future = self.user_interface_srv.call_async(request)
|
||||
self.__future.add_done_callback(self.handle_user_response)
|
||||
|
||||
self.data_send_to_user()
|
||||
|
||||
def on_enter_prepare_user_data_for_evaluation(self):
|
||||
# Update the user_covariance
|
||||
self.user_covariance = self.__user_response.user_covariance_diag
|
||||
|
||||
request = TaskEvaluation.Request()
|
||||
request.user_input = True
|
||||
request.number_of_population = self.cmaes.popsize
|
||||
request.user_parameters = self.__user_response.user_parameters
|
||||
request.user_covariance_diag = self.user_covariance
|
||||
request.current_cma_mean = flatten_population(self.cmaes.mean)
|
||||
request.conditional_points = self.__user_response.conditional_points
|
||||
request.weight_parameter = self.__user_response.weight_parameter
|
||||
|
||||
self.__future = self.task_srv.call_async(request)
|
||||
self.__future.add_done_callback(self.handle_task_response)
|
||||
|
||||
self.send_user_data_to_evaluation()
|
||||
|
||||
def on_enter_update_optimizer(self):
|
||||
population = unflatten_population(self.__task_response.parameter_array,
|
||||
self.cmaes.popsize,
|
||||
self.number_of_dimensions,
|
||||
self.number_of_parameters_per_dimensions)
|
||||
scores = self.__task_response.scores
|
||||
|
||||
self.cmaes.tell(population, scores)
|
||||
|
||||
# save best results
|
||||
best_idx = max(enumerate(scores), key=lambda item: item[1])[0]
|
||||
self.best_reward_per_iteration.append(scores[best_idx].copy())
|
||||
self.best_parameters_per_interation.append(population[best_idx, :, :].copy())
|
||||
|
||||
self.__task_response = None
|
||||
self.episode += 1
|
||||
|
||||
self.optimizer_updated()
|
||||
|
||||
def on_enter_check_termination(self):
|
||||
max_episodes = self.get_parameter('max_episode_count').get_parameter_value().integer_value
|
||||
|
||||
if self.episode >= max_episodes:
|
||||
self.finished()
|
||||
else:
|
||||
self.next_optimizer_step()
|
||||
|
||||
def on_enter_complete(self):
|
||||
pass
|
||||
|
||||
def on_enter_error_recovery(self):
|
||||
pass
|
||||
|
||||
# endregion
|
||||
|
||||
# region Callback Functions
|
||||
def parameter_callback(self, request, response):
|
||||
param_names = request.parameter_name
|
||||
param_types = request.parameter_type
|
||||
param_values_str = request.parameter_value
|
||||
|
||||
if len(param_names) != len(param_types) or len(param_types) != len(param_values_str):
|
||||
response.success = False
|
||||
response.message = 'Lists must have the same size'
|
||||
return response
|
||||
|
||||
try:
|
||||
# Parameter update loop
|
||||
all_params = []
|
||||
for i in range(len(param_names)):
|
||||
param_name = param_names[i]
|
||||
param_type = param_types[i]
|
||||
param_value_str = param_values_str[i]
|
||||
|
||||
# Input Validation (adjust as needed)
|
||||
if param_type not in ['float32', 'string', 'bool']:
|
||||
response.success = False
|
||||
response.message = 'Unsupported parameter type'
|
||||
return response
|
||||
|
||||
# Value Conversion (based on param_type)
|
||||
if param_type == 'float32':
|
||||
param_value = float(param_value_str)
|
||||
elif param_type == 'bool':
|
||||
param_value = (param_value_str.lower() == 'true')
|
||||
else: # 'string'
|
||||
param_value = param_value_str
|
||||
|
||||
# Compose the all parameter list
|
||||
param = Parameter(param_name, Parameter.Type.from_parameter_value(param_value), param_value)
|
||||
all_params.append(param)
|
||||
|
||||
# Attempt to set the parameter
|
||||
set_params_result = self.set_parameters(all_params)
|
||||
|
||||
response.success = True # Assume success unless set_parameters fails
|
||||
for result in set_params_result:
|
||||
if not result.successful:
|
||||
response.success = False
|
||||
response.message = result.reason
|
||||
break
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
self.get_logger().error(f'Parameter update failed: {str(e)}')
|
||||
response.success = False
|
||||
response.message = 'Parameter update failed, please check logs'
|
||||
return response
|
||||
|
||||
def check_heartbeats(self):
|
||||
if self.last_mr_heartbeat_time:
|
||||
current_time = self.get_clock().now().nanoseconds
|
||||
if (current_time - self.last_mr_heartbeat_time) > (self.heartbeat_timeout * 1e9):
|
||||
self.get_logger().error("MR Interface heartbeat timed out!")
|
||||
self.error_trigger()
|
||||
else:
|
||||
pass
|
||||
|
||||
def mr_heartbeat_callback(self, _):
|
||||
self.last_mr_heartbeat_time = self.get_clock().now().nanoseconds
|
||||
|
||||
def handle_query_response(self, future):
|
||||
try:
|
||||
response = future.result()
|
||||
|
||||
if self.episode < self.number_of_initial_episodes.value:
|
||||
self.non_interaction()
|
||||
|
||||
if response.interaction:
|
||||
self.interaction()
|
||||
else:
|
||||
self.non_interaction()
|
||||
|
||||
except Exception as e:
|
||||
self.get_logger().error(f'Query service call failed: {e}')
|
||||
self.error_trigger()
|
||||
|
||||
def _task_goal_callback(self, future):
|
||||
goal_handle = future.result()
|
||||
|
||||
if not goal_handle.accepted:
|
||||
self.get_logger().error(f'Task Goal rejected: {goal_handle}')
|
||||
self.error_trigger()
|
||||
|
||||
self.__response_future = goal_handle.get_result_asyn()
|
||||
self.__response_future.add_done_callback(self._task_result_callback)
|
||||
|
||||
def _task_feedback_callback(self, msg):
|
||||
self.get_logger().info(f'Received Feedback: state={msg.current_state}, processed={msg.processed_trajectories}')
|
||||
def _task_result_callback(self, future):
|
||||
self.__task_response = future.result()
|
||||
self.evaluation_response_received()
|
||||
|
||||
def handle_user_response(self, future):
|
||||
try:
|
||||
self.__user_response = future.result()
|
||||
self.user_response_received()
|
||||
|
||||
except Exception as e:
|
||||
self.get_logger().error(f'Task service call failed: {e}')
|
||||
self.error_trigger()
|
||||
# endregion
|
||||
|
||||
|
||||
def main(args=None):
|
||||
rclpy.init(args=args)
|
||||
node = CMAESOptimizationNode()
|
||||
rclpy.spin(node)
|
||||
rclpy.shutdown()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,138 @@
|
||||
|
||||
import numpy as np
|
||||
from sklearn.gaussian_process import GaussianProcessRegressor
|
||||
from sklearn.gaussian_process.kernels import Matern, RBF, ExpSineSquared
|
||||
|
||||
from ..acquisition_function import ConfidenceBounds
|
||||
from ..acquisition_function import ProbabilityOfImprovement
|
||||
from ..acquisition_function import ExpectedImprovement
|
||||
from ..acquisition_function import PreferenceExpectedImprovement
|
||||
|
||||
from sklearn.exceptions import ConvergenceWarning
|
||||
import warnings
|
||||
|
||||
warnings.filterwarnings('ignore', category=ConvergenceWarning)
|
||||
|
||||
|
||||
class BayesianOptimization:
|
||||
def __init__(self, nr_steps, nr_dimensions, nr_policy_parameters, seed=None,
|
||||
fixed_dimensions=None, lower_bound=None, upper_bound=None,
|
||||
acquisition_function_name="EI", kernel_name="Matern",
|
||||
**kwargs):
|
||||
|
||||
self.nr_steps = nr_steps
|
||||
self.nr_dimensions = nr_dimensions
|
||||
self.nr_policy_parameters = nr_policy_parameters
|
||||
self.nr_weights = nr_policy_parameters * nr_dimensions
|
||||
|
||||
if lower_bound is None:
|
||||
self.lower_bound = [-1.] * self.nr_weights
|
||||
else:
|
||||
self.lower_bound = lower_bound
|
||||
|
||||
if upper_bound is None:
|
||||
self.upper_bound = [-1.] * self.nr_weights
|
||||
else:
|
||||
self.upper_bound = upper_bound
|
||||
|
||||
self.seed = seed
|
||||
self.fixed_dimensions = fixed_dimensions
|
||||
|
||||
self.x_observed = None
|
||||
self.y_observed = None
|
||||
self.best_reward = None
|
||||
self.episode = 0
|
||||
|
||||
self.gauss_process = None
|
||||
self.n_restarts_optimizer = kwargs.get('n_restarts_optimizer', 5)
|
||||
|
||||
|
||||
|
||||
# region Kernel
|
||||
length_scale = kwargs.get('length_scale', 1.0)
|
||||
|
||||
if kernel_name == "Matern":
|
||||
nu = kwargs.get('nu', 1.5)
|
||||
self.kernel = Matern(nu=nu, length_scale=length_scale)
|
||||
|
||||
elif kernel_name == "RBF":
|
||||
self.kernel = RBF(length_scale=length_scale)
|
||||
|
||||
elif kernel_name == "ExpSineSquared":
|
||||
periodicity = kwargs.get('periodicity', 1.0)
|
||||
self.kernel = ExpSineSquared(length_scale=length_scale, periodicity=periodicity)
|
||||
|
||||
else:
|
||||
raise NotImplementedError("This kernel is not implemented!")
|
||||
# endregion
|
||||
|
||||
# region Acquisitionfunctions
|
||||
if 'nr_samples' in kwargs:
|
||||
nr_samples = kwargs['nr_samples']
|
||||
else:
|
||||
nr_samples = 100
|
||||
|
||||
if acquisition_function_name == "CB":
|
||||
beta = kwargs.get('beta', 1.2)
|
||||
self.acquisition_function = ConfidenceBounds(self.nr_weights, nr_samples=nr_samples, beta=beta, seed=seed,
|
||||
lower_bound=lower_bound, upper_bound=upper_bound)
|
||||
|
||||
elif acquisition_function_name == "PI":
|
||||
kappa = kwargs.get('kappa', 0.0)
|
||||
self.acquisition_function = ProbabilityOfImprovement(self.nr_weights, nr_samples=nr_samples, kappa=kappa,
|
||||
seed=seed, lower_bound=lower_bound,
|
||||
upper_bound=upper_bound)
|
||||
elif acquisition_function_name == "EI":
|
||||
kappa = kwargs.get('kappa', 0.0)
|
||||
self.acquisition_function = ExpectedImprovement(self.nr_weights, nr_samples=nr_samples, kappa=kappa,
|
||||
seed=seed, lower_bound=lower_bound, upper_bound=upper_bound)
|
||||
elif acquisition_function_name == "PEI":
|
||||
kappa = kwargs.get('kappa', 0.0)
|
||||
|
||||
initial_variance = kwargs.get('initial_variance', None)
|
||||
update_variance = kwargs.get('update_variance', None)
|
||||
|
||||
if initial_variance is None or update_variance is None:
|
||||
raise ValueError("Initial_variance and update_variance has to be provided in PEI!")
|
||||
|
||||
self.acquisition_function = PreferenceExpectedImprovement(self.nr_weights, initial_variance,
|
||||
update_variance, nr_samples=nr_samples,
|
||||
kappa=kappa, lower_bound=lower_bound,
|
||||
upper_bound=upper_bound, seed=seed,
|
||||
fixed_dims=fixed_dimensions)
|
||||
else:
|
||||
raise NotImplementedError("This acquisition function is not implemented!")
|
||||
# endregion
|
||||
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.gauss_process = GaussianProcessRegressor(self.kernel, n_restarts_optimizer=self.n_restarts_optimizer)
|
||||
self.best_reward = np.empty((1, 1))
|
||||
self.x_observed = np.zeros((1, self.nr_weights), dtype=np.float64)
|
||||
self.y_observed = np.zeros((1, 1), dtype=np.float64)
|
||||
self.episode = 0
|
||||
|
||||
def next_observation(self):
|
||||
x_next = self.acquisition_function(self.gauss_process, self.x_observed, seed=self.seed)
|
||||
return x_next
|
||||
|
||||
def add_observation(self, y_new, x_new, fit=True):
|
||||
if self.x_observed.shape[0] == 1:
|
||||
self.x_observed[0, :] = x_new
|
||||
self.y_observed[0] = y_new
|
||||
self.best_reward[0] = np.max(self.y_observed)
|
||||
else:
|
||||
self.x_observed = np.vstack((self.x_observed, np.around(x_new, decimals=8)))
|
||||
self.y_observed = np.vstack((self.y_observed, y_new))
|
||||
self.best_reward = np.vstack((self.best_reward, np.max(self.y_observed)))
|
||||
|
||||
if fit:
|
||||
self.gauss_process.fit(self.x_observed, self.y_observed)
|
||||
self.episode += 1
|
||||
|
||||
def get_best_result(self):
|
||||
y_max = np.max(self.y_observed)
|
||||
idx = np.argmax(self.y_observed)
|
||||
x_max = self.x_observed[idx, :]
|
||||
return y_max, x_max, idx
|
21
src/interaction_optimizers/package.xml
Normal file
21
src/interaction_optimizers/package.xml
Normal file
@ -0,0 +1,21 @@
|
||||
<?xml version="1.0"?>
|
||||
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
|
||||
<package format="3">
|
||||
<name>interaction_optimizers</name>
|
||||
<version>0.0.0</version>
|
||||
<description>TODO: Package description</description>
|
||||
<maintainer email="nikolaus.feith@unileoben.ac.at">niko</maintainer>
|
||||
<license>TODO: License declaration</license>
|
||||
|
||||
<exec_depend>interaction_msgs</exec_depend>
|
||||
<exec_depend>rclpy</exec_depend>
|
||||
|
||||
<test_depend>ament_copyright</test_depend>
|
||||
<test_depend>ament_flake8</test_depend>
|
||||
<test_depend>ament_pep257</test_depend>
|
||||
<test_depend>python3-pytest</test_depend>
|
||||
|
||||
<export>
|
||||
<build_type>ament_python</build_type>
|
||||
</export>
|
||||
</package>
|
4
src/interaction_optimizers/setup.cfg
Normal file
4
src/interaction_optimizers/setup.cfg
Normal file
@ -0,0 +1,4 @@
|
||||
[develop]
|
||||
script_dir=$base/lib/interaction_optimizers
|
||||
[install]
|
||||
install_scripts=$base/lib/interaction_optimizers
|
29
src/interaction_optimizers/setup.py
Normal file
29
src/interaction_optimizers/setup.py
Normal file
@ -0,0 +1,29 @@
|
||||
from setuptools import find_packages, setup
|
||||
import os
|
||||
from glob import glob
|
||||
|
||||
package_name = 'interaction_optimizers'
|
||||
|
||||
setup(
|
||||
name=package_name,
|
||||
version='0.0.0',
|
||||
packages=find_packages(exclude=['test']),
|
||||
data_files=[
|
||||
('share/ament_index/resource_index/packages',
|
||||
['resource/' + package_name]),
|
||||
('share/' + package_name, ['package.xml']),
|
||||
(os.path.join('share', package_name, 'config'), glob('config/*.yaml')),
|
||||
],
|
||||
install_requires=['setuptools'],
|
||||
zip_safe=True,
|
||||
maintainer='niko',
|
||||
maintainer_email='nikolaus.feith@unileoben.ac.at',
|
||||
description='TODO: Package description',
|
||||
license='TODO: License declaration',
|
||||
tests_require=['pytest'],
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'bo_node = interaction_optimizers.bayesian_optimization_node:main'
|
||||
],
|
||||
},
|
||||
)
|
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user