Manual use without image transfere

This commit is contained in:
Niko Feith 2023-03-02 11:14:14 +01:00
parent f429048298
commit 45f789f9b7
9 changed files with 112 additions and 26 deletions

View File

@ -21,7 +21,8 @@ find_package(rosidl_default_generators REQUIRED)
rosidl_generate_interfaces(${PROJECT_NAME} rosidl_generate_interfaces(${PROJECT_NAME}
"srv/WeightToPolicy.srv" "srv/WeightToPolicy.srv"
"action/RLRollOut.action" "srv/RLRollOut.srv"
"msg/ImageFeedback.msg"
) )
if(BUILD_TESTING) if(BUILD_TESTING)

View File

@ -0,0 +1,5 @@
int16 height
int16 width
uint8[] red
uint8[] green
uint8[] blue

View File

@ -0,0 +1,4 @@
float32[] policy
---
float32 reward
uint16 final_step

View File

@ -139,11 +139,11 @@ class Continuous_MountainCarEnv(gym.Env):
low=self.low_state, high=self.high_state, dtype=np.float32 low=self.low_state, high=self.high_state, dtype=np.float32
) )
def step(self, action: np.ndarray): def step(self, action: float):
position = self.state[0] position = self.state[0]
velocity = self.state[1] velocity = self.state[1]
force = min(max(action[0], self.min_action), self.max_action) force = min(max(action, self.min_action), self.max_action)
velocity += force * self.power - 0.0025 * math.cos(3 * position) velocity += force * self.power - 0.0025 * math.cos(3 * position)
if velocity > self.max_speed: if velocity > self.max_speed:
@ -166,7 +166,7 @@ class Continuous_MountainCarEnv(gym.Env):
reward = 0 reward = 0
if terminated: if terminated:
reward += 10 reward += 10
reward -= math.pow(action[0], 2) * 0.1 reward -= math.pow(action, 2) * 0.1
reward -= 1 reward -= 1
self.state = np.array([position, velocity], dtype=np.float32) self.state = np.array([position, velocity], dtype=np.float32)

View File

@ -1,32 +1,33 @@
from active_bo_msgs.action import RLRollOut from active_bo_msgs.srv import RLRollOut
from active_bo_msgs.msg import ImageFeedback
import rclpy import rclpy
from rclpy.node import Node from rclpy.node import Node
from rclpy.action import ActionServer
from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv
import numpy as np import numpy as np
import time import time
class RLActionServer(Node): class RLService(Node):
def __init__(self): def __init__(self):
super().__init__('rl_action') super().__init__('rl_service')
self.action_server = ActionServer(self, self.srv = self.create_service(RLRollOut, 'rl_srv', self.rl_callback)
RLRollOut,
'reinforcement_learning_action', self.publisher = self.create_publisher(ImageFeedback, 'rl_feedback', 1)
self.execute_callback)
self.env = Continuous_MountainCarEnv(render_mode='rgb_array') self.env = Continuous_MountainCarEnv(render_mode='rgb_array')
self.distance_penalty = 0 self.distance_penalty = 0
def execute_callback(self, action_handle): def rl_callback(self, request, response):
feedback_msg = RLRollOut.Feedback() feedback_msg = ImageFeedback()
reward = 0 reward = 0
step_count = 0 step_count = 0
policy = action_handle.request.policy policy = request.policy
self.env.reset()
for i in range(len(policy)): for i in range(len(policy)):
action = policy[i] action = policy[i]
@ -42,7 +43,7 @@ class RLActionServer(Node):
feedback_msg.height = rgb_shape[0] feedback_msg.height = rgb_shape[0]
feedback_msg.width = rgb_shape[1] feedback_msg.width = rgb_shape[1]
action_handle.publish_feedback(feedback_msg) self.publisher.publish(feedback_msg)
if done: if done:
break break
@ -52,20 +53,17 @@ class RLActionServer(Node):
time.sleep(0.01) time.sleep(0.01)
action_handle.succeed() response.reward = reward
response.final_step = step_count
result = RLRollOut.Result() return response
result.reward = reward
result.final_step = step_count
return result
def main(args=None): def main(args=None):
rclpy.init(args=args) rclpy.init(args=args)
rl_action_server = RLActionServer() rl_service = RLService()
rclpy.spin(rl_action_server) rclpy.spin(rl_service)
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -0,0 +1,67 @@
from active_bo_msgs.srv import RLRollOut
from active_bo_msgs.msg import ImageFeedback
import rclpy
from rclpy.node import Node
from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv
import numpy as np
import time
class RLService(Node):
def __init__(self):
super().__init__('rl_service')
self.srv = self.create_service(RLRollOut, 'rl_srv', self.rl_callback)
self.publisher = self.create_publisher(ImageFeedback, 'rl_feedback', 1)
self.env = Continuous_MountainCarEnv(render_mode='rgb_array')
self.distance_penalty = 0
def rl_callback(self, request, response):
feedback_msg = ImageFeedback()
reward = 0
step_count = 0
policy = request.policy
for i in range(len(policy)):
action = policy[i]
output = self.env.step(action)
reward += output[1]
done = output[2]
step_count += 1
rgb_array = output[5]
rgb_shape = rgb_array.shape
feedback_msg.height = rgb_shape[0]
feedback_msg.width = rgb_shape[1]
self.publisher.publish(feedback_msg)
if done:
break
distance = -(self.env.goal_position - output[0][0])
reward += distance * self.distance_penalty
time.sleep(0.01)
response.reward = reward
response.final_step = step_count
return response
def main(args=None):
rclpy.init(args=args)
rl_service = RLService()
rclpy.spin(rl_service)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,11 @@
from launch import LaunchDescription
from launch_ros.actions import Node
def generate_launch_description():
return LaunchDescription([
Node(
package='active_bo_ros',
executable='rl_srv',
name='rl_service'
),
])

View File

@ -16,7 +16,7 @@ setup(
('share/' + package_name, ['package.xml']), ('share/' + package_name, ['package.xml']),
(os.path.join('share', package_name), glob('launch/*.launch.py')), (os.path.join('share', package_name), glob('launch/*.launch.py')),
], ],
install_requires=['setuptools'], install_requires=['setuptools', 'gym', 'numpy'],
zip_safe=True, zip_safe=True,
maintainer='cpsfeith', maintainer='cpsfeith',
maintainer_email='nikolaus.feith@unileoben.ac.at', maintainer_email='nikolaus.feith@unileoben.ac.at',
@ -26,7 +26,7 @@ setup(
entry_points={ entry_points={
'console_scripts': [ 'console_scripts': [
'policy_srv = active_bo_ros.policy_service:main', 'policy_srv = active_bo_ros.policy_service:main',
'rl_action = active_bo_ros.rl_action_server:main' 'rl_srv = active_bo_ros.rl_service:main'
], ],
}, },
) )