Manual use without image transfere
This commit is contained in:
parent
f429048298
commit
45f789f9b7
@ -21,7 +21,8 @@ find_package(rosidl_default_generators REQUIRED)
|
|||||||
|
|
||||||
rosidl_generate_interfaces(${PROJECT_NAME}
|
rosidl_generate_interfaces(${PROJECT_NAME}
|
||||||
"srv/WeightToPolicy.srv"
|
"srv/WeightToPolicy.srv"
|
||||||
"action/RLRollOut.action"
|
"srv/RLRollOut.srv"
|
||||||
|
"msg/ImageFeedback.msg"
|
||||||
)
|
)
|
||||||
|
|
||||||
if(BUILD_TESTING)
|
if(BUILD_TESTING)
|
||||||
|
5
src/active_bo_msgs/msg/ImageFeedback.msg
Normal file
5
src/active_bo_msgs/msg/ImageFeedback.msg
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
int16 height
|
||||||
|
int16 width
|
||||||
|
uint8[] red
|
||||||
|
uint8[] green
|
||||||
|
uint8[] blue
|
4
src/active_bo_msgs/srv/RLRollOut.srv
Normal file
4
src/active_bo_msgs/srv/RLRollOut.srv
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
float32[] policy
|
||||||
|
---
|
||||||
|
float32 reward
|
||||||
|
uint16 final_step
|
@ -139,11 +139,11 @@ class Continuous_MountainCarEnv(gym.Env):
|
|||||||
low=self.low_state, high=self.high_state, dtype=np.float32
|
low=self.low_state, high=self.high_state, dtype=np.float32
|
||||||
)
|
)
|
||||||
|
|
||||||
def step(self, action: np.ndarray):
|
def step(self, action: float):
|
||||||
|
|
||||||
position = self.state[0]
|
position = self.state[0]
|
||||||
velocity = self.state[1]
|
velocity = self.state[1]
|
||||||
force = min(max(action[0], self.min_action), self.max_action)
|
force = min(max(action, self.min_action), self.max_action)
|
||||||
|
|
||||||
velocity += force * self.power - 0.0025 * math.cos(3 * position)
|
velocity += force * self.power - 0.0025 * math.cos(3 * position)
|
||||||
if velocity > self.max_speed:
|
if velocity > self.max_speed:
|
||||||
@ -166,7 +166,7 @@ class Continuous_MountainCarEnv(gym.Env):
|
|||||||
reward = 0
|
reward = 0
|
||||||
if terminated:
|
if terminated:
|
||||||
reward += 10
|
reward += 10
|
||||||
reward -= math.pow(action[0], 2) * 0.1
|
reward -= math.pow(action, 2) * 0.1
|
||||||
reward -= 1
|
reward -= 1
|
||||||
|
|
||||||
self.state = np.array([position, velocity], dtype=np.float32)
|
self.state = np.array([position, velocity], dtype=np.float32)
|
||||||
|
@ -1,32 +1,33 @@
|
|||||||
from active_bo_msgs.action import RLRollOut
|
from active_bo_msgs.srv import RLRollOut
|
||||||
|
from active_bo_msgs.msg import ImageFeedback
|
||||||
|
|
||||||
import rclpy
|
import rclpy
|
||||||
from rclpy.node import Node
|
from rclpy.node import Node
|
||||||
from rclpy.action import ActionServer
|
|
||||||
|
|
||||||
from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv
|
from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import time
|
import time
|
||||||
|
|
||||||
class RLActionServer(Node):
|
class RLService(Node):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__('rl_action')
|
super().__init__('rl_service')
|
||||||
self.action_server = ActionServer(self,
|
self.srv = self.create_service(RLRollOut, 'rl_srv', self.rl_callback)
|
||||||
RLRollOut,
|
|
||||||
'reinforcement_learning_action',
|
self.publisher = self.create_publisher(ImageFeedback, 'rl_feedback', 1)
|
||||||
self.execute_callback)
|
|
||||||
|
|
||||||
self.env = Continuous_MountainCarEnv(render_mode='rgb_array')
|
self.env = Continuous_MountainCarEnv(render_mode='rgb_array')
|
||||||
self.distance_penalty = 0
|
self.distance_penalty = 0
|
||||||
|
|
||||||
def execute_callback(self, action_handle):
|
def rl_callback(self, request, response):
|
||||||
|
|
||||||
feedback_msg = RLRollOut.Feedback()
|
feedback_msg = ImageFeedback()
|
||||||
|
|
||||||
reward = 0
|
reward = 0
|
||||||
step_count = 0
|
step_count = 0
|
||||||
policy = action_handle.request.policy
|
policy = request.policy
|
||||||
|
|
||||||
|
self.env.reset()
|
||||||
|
|
||||||
for i in range(len(policy)):
|
for i in range(len(policy)):
|
||||||
action = policy[i]
|
action = policy[i]
|
||||||
@ -42,7 +43,7 @@ class RLActionServer(Node):
|
|||||||
feedback_msg.height = rgb_shape[0]
|
feedback_msg.height = rgb_shape[0]
|
||||||
feedback_msg.width = rgb_shape[1]
|
feedback_msg.width = rgb_shape[1]
|
||||||
|
|
||||||
action_handle.publish_feedback(feedback_msg)
|
self.publisher.publish(feedback_msg)
|
||||||
|
|
||||||
if done:
|
if done:
|
||||||
break
|
break
|
||||||
@ -52,20 +53,17 @@ class RLActionServer(Node):
|
|||||||
|
|
||||||
time.sleep(0.01)
|
time.sleep(0.01)
|
||||||
|
|
||||||
action_handle.succeed()
|
response.reward = reward
|
||||||
|
response.final_step = step_count
|
||||||
|
|
||||||
result = RLRollOut.Result()
|
return response
|
||||||
result.reward = reward
|
|
||||||
result.final_step = step_count
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def main(args=None):
|
def main(args=None):
|
||||||
rclpy.init(args=args)
|
rclpy.init(args=args)
|
||||||
|
|
||||||
rl_action_server = RLActionServer()
|
rl_service = RLService()
|
||||||
|
|
||||||
rclpy.spin(rl_action_server)
|
rclpy.spin(rl_service)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
67
src/active_bo_ros/dump/rl_action_server.py
Normal file
67
src/active_bo_ros/dump/rl_action_server.py
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
from active_bo_msgs.srv import RLRollOut
|
||||||
|
from active_bo_msgs.msg import ImageFeedback
|
||||||
|
|
||||||
|
import rclpy
|
||||||
|
from rclpy.node import Node
|
||||||
|
|
||||||
|
from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import time
|
||||||
|
|
||||||
|
class RLService(Node):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__('rl_service')
|
||||||
|
self.srv = self.create_service(RLRollOut, 'rl_srv', self.rl_callback)
|
||||||
|
|
||||||
|
self.publisher = self.create_publisher(ImageFeedback, 'rl_feedback', 1)
|
||||||
|
|
||||||
|
self.env = Continuous_MountainCarEnv(render_mode='rgb_array')
|
||||||
|
self.distance_penalty = 0
|
||||||
|
|
||||||
|
def rl_callback(self, request, response):
|
||||||
|
|
||||||
|
feedback_msg = ImageFeedback()
|
||||||
|
|
||||||
|
reward = 0
|
||||||
|
step_count = 0
|
||||||
|
policy = request.policy
|
||||||
|
|
||||||
|
for i in range(len(policy)):
|
||||||
|
action = policy[i]
|
||||||
|
output = self.env.step(action)
|
||||||
|
|
||||||
|
reward += output[1]
|
||||||
|
done = output[2]
|
||||||
|
step_count += 1
|
||||||
|
|
||||||
|
rgb_array = output[5]
|
||||||
|
rgb_shape = rgb_array.shape
|
||||||
|
|
||||||
|
feedback_msg.height = rgb_shape[0]
|
||||||
|
feedback_msg.width = rgb_shape[1]
|
||||||
|
|
||||||
|
self.publisher.publish(feedback_msg)
|
||||||
|
|
||||||
|
if done:
|
||||||
|
break
|
||||||
|
|
||||||
|
distance = -(self.env.goal_position - output[0][0])
|
||||||
|
reward += distance * self.distance_penalty
|
||||||
|
|
||||||
|
time.sleep(0.01)
|
||||||
|
|
||||||
|
response.reward = reward
|
||||||
|
response.final_step = step_count
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
def main(args=None):
|
||||||
|
rclpy.init(args=args)
|
||||||
|
|
||||||
|
rl_service = RLService()
|
||||||
|
|
||||||
|
rclpy.spin(rl_service)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
11
src/active_bo_ros/launch/rl_service.launch.py
Executable file
11
src/active_bo_ros/launch/rl_service.launch.py
Executable file
@ -0,0 +1,11 @@
|
|||||||
|
from launch import LaunchDescription
|
||||||
|
from launch_ros.actions import Node
|
||||||
|
|
||||||
|
def generate_launch_description():
|
||||||
|
return LaunchDescription([
|
||||||
|
Node(
|
||||||
|
package='active_bo_ros',
|
||||||
|
executable='rl_srv',
|
||||||
|
name='rl_service'
|
||||||
|
),
|
||||||
|
])
|
@ -16,7 +16,7 @@ setup(
|
|||||||
('share/' + package_name, ['package.xml']),
|
('share/' + package_name, ['package.xml']),
|
||||||
(os.path.join('share', package_name), glob('launch/*.launch.py')),
|
(os.path.join('share', package_name), glob('launch/*.launch.py')),
|
||||||
],
|
],
|
||||||
install_requires=['setuptools'],
|
install_requires=['setuptools', 'gym', 'numpy'],
|
||||||
zip_safe=True,
|
zip_safe=True,
|
||||||
maintainer='cpsfeith',
|
maintainer='cpsfeith',
|
||||||
maintainer_email='nikolaus.feith@unileoben.ac.at',
|
maintainer_email='nikolaus.feith@unileoben.ac.at',
|
||||||
@ -26,7 +26,7 @@ setup(
|
|||||||
entry_points={
|
entry_points={
|
||||||
'console_scripts': [
|
'console_scripts': [
|
||||||
'policy_srv = active_bo_ros.policy_service:main',
|
'policy_srv = active_bo_ros.policy_service:main',
|
||||||
'rl_action = active_bo_ros.rl_action_server:main'
|
'rl_srv = active_bo_ros.rl_service:main'
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user