Closed AgentEXPL closed 2 years ago
Good spot! Yes, you are correct, it should go "publish twist message->propagate state->get reward->return new state and reward"
The action publishing should happen first like:
def step(self, act):
# Publish the robot action
vel_cmd = Twist()
vel_cmd.linear.x = act[0]
vel_cmd.angular.z = act[1]
self.vel_pub.publish(vel_cmd)
And only then unpause the simulator. I must have moved the code around at some point. Can you confirm that if the placing the velocity publishing command there, does not break anything, and it works fine?
After changing the code as follows, it also works. The code obeys the process of "publish twist message->propagate state->get reward->return new state and reward". In "propagate state", time.sleep is first called then state value is obtained by rospy.wait_for_message.
By the way, the way of generating value of laser_state is also changed as the way in def reset. If it is not changed, the code also works.
def step(self, act):
target = False
# Publish the robot action
vel_cmd = Twist()
vel_cmd.linear.x = act[0]
vel_cmd.angular.z = act[1]
self.vel_pub.publish(vel_cmd)
rospy.wait_for_service('/gazebo/unpause_physics')
try:
self.unpause()
except (rospy.ServiceException) as e:
print("/gazebo/unpause_physics service call failed")
time.sleep(0.1)
data = None
while data is None:
try:
data = rospy.wait_for_message('/r1/front_laser/scan', LaserScan, timeout=0.5)
except:
pass
"""
laser_state = np.array(data.ranges[:])
v_state = []
v_state[:] = self.velodyne_data[:]
laser_state = [v_state]
done, col, min_laser = self.calculate_observation(data)
"""
dataOdom = None
while dataOdom is None:
try:
dataOdom = rospy.wait_for_message('/r1/odom', Odometry, timeout=0.5)
except:
pass
rospy.wait_for_service('/gazebo/pause_physics')
try:
pass
self.pause()
except (rospy.ServiceException) as e:
print("/gazebo/pause_physics service call failed")
laser_state = np.array(data.ranges[:])
laser_state[laser_state == inf] = 10
laser_state = binning(0, laser_state, 20)
done, col, min_laser = self.calculate_observation(data)
# Calculate robot heading from odometry data
self.odomX = dataOdom.pose.pose.position.x
self.odomY = dataOdom.pose.pose.position.y
quaternion = Quaternion(
dataOdom.pose.pose.orientation.w,
dataOdom.pose.pose.orientation.x,
dataOdom.pose.pose.orientation.y,
dataOdom.pose.pose.orientation.z)
euler = quaternion.to_euler(degrees=False)
angle = round(euler[2], 4)
# Calculate distance to the goal from the robot
Dist = math.sqrt(math.pow(self.odomX - self.goalX, 2) + math.pow(self.odomY - self.goalY, 2))
# Calculate the angle distance between the robots heading and heading toward the goal
skewX = self.goalX - self.odomX
skewY = self.goalY - self.odomY
dot = skewX * 1 + skewY * 0
mag1 = math.sqrt(math.pow(skewX, 2) + math.pow(skewY, 2))
mag2 = math.sqrt(math.pow(1, 2) + math.pow(0, 2))
beta = math.acos(dot / (mag1 * mag2))
if skewY < 0:
if skewX < 0:
beta = -beta
else:
beta = 0 - beta
beta2 = (beta - angle)
if beta2 > np.pi:
beta2 = np.pi - beta2
beta2 = -np.pi - beta2
if beta2 < -np.pi:
beta2 = -np.pi - beta2
beta2 = np.pi - beta2
# Publish visual data in Rviz
markerArray = MarkerArray()
marker = Marker()
marker.header.frame_id = "base_link"
marker.type = marker.CYLINDER
marker.action = marker.ADD
marker.scale.x = 0.1
marker.scale.y = 0.1
marker.scale.z = 0.01
marker.color.a = 1.0
marker.color.r = 0.0
marker.color.g = 1.0
marker.color.b = 0.0
marker.pose.orientation.w = 1.0
marker.pose.position.x = self.goalX
marker.pose.position.y = self.goalY
marker.pose.position.z = 0
markerArray.markers.append(marker)
self.publisher.publish(markerArray)
markerArray2 = MarkerArray()
marker2 = Marker()
marker2.header.frame_id = "base_link"
marker2.type = marker.CUBE
marker2.action = marker.ADD
marker2.scale.x = abs(act[0])
marker2.scale.y = 0.1
marker2.scale.z = 0.01
marker2.color.a = 1.0
marker2.color.r = 1.0
marker2.color.g = 0.0
marker2.color.b = 0.0
marker2.pose.orientation.w = 1.0
marker2.pose.position.x = 5
marker2.pose.position.y = 0
marker2.pose.position.z = 0
markerArray2.markers.append(marker2)
self.publisher2.publish(markerArray2)
markerArray3 = MarkerArray()
marker3 = Marker()
marker3.header.frame_id = "base_link"
marker3.type = marker.CUBE
marker3.action = marker.ADD
marker3.scale.x = abs(act[1])
marker3.scale.y = 0.1
marker3.scale.z = 0.01
marker3.color.a = 1.0
marker3.color.r = 1.0
marker3.color.g = 0.0
marker3.color.b = 0.0
marker3.pose.orientation.w = 1.0
marker3.pose.position.x = 5
marker3.pose.position.y = 0.2
marker3.pose.position.z = 0
markerArray3.markers.append(marker3)
self.publisher3.publish(markerArray3)
markerArray4 = MarkerArray()
marker4 = Marker()
marker4.header.frame_id = "base_link"
marker4.type = marker.CUBE
marker4.action = marker.ADD
marker4.scale.x = 0.1 # abs(act2)
marker4.scale.y = 0.1
marker4.scale.z = 0.01
marker4.color.a = 1.0
marker4.color.r = 1.0
marker4.color.g = 0.0
marker4.color.b = 0.0
marker4.pose.orientation.w = 1.0
marker4.pose.position.x = 5
marker4.pose.position.y = 0.4
marker4.pose.position.z = 0
markerArray4.markers.append(marker4)
self.publisher4.publish(markerArray4)
'''Bunch of different ways to generate the reward'''
# reward = act[0]*0.7-abs(act[1])
# r1 = 1 - 2 * math.sqrt(abs(beta2 / np.pi))
# r2 = self.distOld - Dist
r3 = lambda x: 1 - x if x < 1 else 0.0
# rl = 0
# for r in range(len(laser_state[0])):
# rl += r3(laser_state[0][r])
# reward = 0.8 * r1 + 30 * r2 + act[0]/2 - abs(act[1])/2 - r3(min(laser_state[0]))/2
reward = act[0] / 2 - abs(act[1]) / 2 - r3(min(laser_state[0])) / 2
# reward = 30 * r2 + act[0] / 2 - abs(act[1]) / 2 # - r3(min(laser_state[0]))/2
# reward = 0.8 * r1 + 30 * r2
self.distOld = Dist
# Detect if the goal has been reached and give a large positive reward
if Dist < 0.3:
target = True
done = True
self.distOld = math.sqrt(math.pow(self.odomX - self.goalX, 2) + math.pow(self.odomY - self.goalY, 2))
reward = 80
# Detect if ta collision has happened and give a large negative reward
if col:
reward = -100
toGoal = [Dist, beta2, act[0], act[1]]
state = np.append(laser_state, toGoal)
return state, reward, done, target
https://github.com/reiniscimurs/DRL-robot-navigation/blob/dea37acfc65f702f7fa792787e09602416cf85d4/TD3/velodyne_env.py#L201
I noticed the follwoing processes in def step.
This makes me confused, since the retured state is not the state after executing action, but actually the state before executing action. However, I think the returned state needs to be input of policy network which generates the action for next step. Then, laser_state and dataOdom should be obtained after time.sleep(0.1) is called, i.e., step 2 and step 3 needs to be exchanged.
Is my understanding correct?