Commit 7858489f authored by sjmonagi's avatar sjmonagi

playing

parent 1874c0d1
......@@ -13,7 +13,7 @@ from Agent_image_only.helper import plotting_training_log, train_valid_env_sync,
random.seed(123)
np.random.seed(123)
dir = "/home/nagi/Desktop/Master_project_final/DRQN_3_her_shaped_sequence_images_only_f1/DRQN.ckpt"
dir = "/home/nagi/Desktop/Master_project_final/DRQN_3_her__sequence_images_only_f1/DRQN.ckpt"
##### environment_Variables
grid_size = 0.18 # size of the agent step
......@@ -161,7 +161,7 @@ with tf.Session() as sess:
plotted_data = plotted_data.append({"Episodes": str(n),
"Successful trajectories": successes / (n + 1),
"Failed trajectories": failures / (n + 1),
"Ratio": (successes / (failures + 1e-6)),
"Ratio": (successes / (failures + 0.1)),
"loss": loss, "epsilon": epsilon,
"F1": ((1-(failures / (n + 1))) * (successes / ( n + 1))) /
(((1-(failures / (n + 1))) + ((successes / ( n + 1))))+1)}, ignore_index=True)
......
......@@ -26,7 +26,7 @@ def plotting_training_log(num_episode, plotted_data, successes, failures, loss,
"agent pos x:%2f" % agent_init_pos[0], "agent pos z:%2f" % agent_init_pos[2],
"distance: %3f" % distance,
"failures:%.3f" % (failures / (num_episode + 1)),
"ratio:%.3f" % (successes / (failures + 1e-6)),
"ratio:%.3f" % (successes / (failures + 0.1)),
"loss: %.2f" % loss, "exploration %.5f" % epsilon,
"Steps:", num_steps)
......
This diff is collapsed.
......@@ -16,7 +16,7 @@ np.random.seed(123)
fields_name = ["iteration", "successes"]
dir = "/home/nagi/Desktop/Master_project_final/DRQN_3_her_sparse_image_and_pos_F1/DRQN.ckpt"
dir = "/home/nagi/Desktop/Master_project_final/DRQN_3_her_shaped_image_and_pos_F1/DRQN.ckpt"
##### environment_Variables
grid_size = 0.18 # size of the agent step
......@@ -25,7 +25,7 @@ distance_threshold = grid_size * 2 # distance threshold to the goal
action_n = 3 # number of allowed action
random_init_position = False # Random initial positions only -- no change in the agent orientation
random_init_pose = True # Random initial positions with random agent orientation
reward = "sparse" # reward type "shaped","sparse"
reward = "shaped" # reward type "shaped","sparse"
######################### hyper-parameter
num_episodes = 15001
......
import numpy as np
import random
class her_buffer(object):
def __init__(self, buffer_size=1000):
self.memory = []
self.mem_size = buffer_size
def clear(self):
self.memory = []
def add(self, experience):
if len(self.memory) + 1 >= self.mem_size:
self.memory[0:(1 + len(self.memory)) - self.mem_size] = []
self.memory.extend(experience)
def sample(self, batch_size, trace_length):
tmp_buffer = [episode for episode in self.memory if len(episode) + 1 > trace_length]
sampled_episodes = random.sample(tmp_buffer, batch_size)
sampled_traces = []
for episode in sampled_episodes:
point = np.random.randint(0, len(episode) + 1 - trace_length)
sampled_traces.append(episode[point:point + trace_length])
sampled_traces = np.array(sampled_traces)
return np.reshape(sampled_traces, [batch_size * trace_length, 7])
import time
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import style
from matplotlib.lines import Line2D
# plt.style.use("seaborn")
def train_valid_env_sync(training_env_pose, validation_env_pose):
equal = np.array_equal(training_env_pose, training_env_pose)
if not equal:
print("Agent position x:", training_env_pose[0], "not equal top view position x:", validation_env_pose[0],
"Agent position z:", training_env_pose[2], "not equal top view position z:", validation_env_pose[2],
"Agent angle :", training_env_pose[4], "not equal top view angle", validation_env_pose[4])
def plotting_training_log(num_episode, plotted_data, successes, failures, loss, goal, distance, agent_init_pos,
epsilon, num_steps):
print("\repisode:", num_episode + 1,
"successes:%.3f" % (successes / (num_episode + 1)),
"goal x:%2f" % goal[0], "goal z:%2f" % goal[2],
"agent pos x:%2f" % agent_init_pos[0], "agent pos z:%2f" % agent_init_pos[2],
"distance: %3f" % distance,
"failures:%.3f" % (failures / (num_episode + 1)),
"ratio:%.3f" % (successes / (failures + 1e-6)),
"loss: %.2f" % loss, "exploration %.5f" % epsilon,
"Steps:", num_steps)
if num_episode % 100 == 0 and num_episode > 0:
# combined plot of successful failed trajectories and Ratio between them
plotted_data.plot(x="Episodes", y=["Successful trajectories", "Failed trajectories", "Ratio"],
title="Agent Learning Ratio")
plt.xlabel("Episodes")
plt.ylabel("Successful/Failed Trajectories and Ratio")
plt.savefig("failed_success_ratio" + str(num_episode) + ".png")
# plots of successful failed trajectories and F1 Score between them
plotted_data.plot(x="Episodes", y=["Successful trajectories", "Failed trajectories", "F1"],
title="Agent Learning Ratio")
plt.xlabel("Episodes")
plt.ylabel("Successful/Failed Trajectories and F1 Score")
plt.savefig("failed_success_F1" + str(num_episode) + ".png")
# plot of successful trajectories
plotted_data.plot(x="Episodes", y=["Successful trajectories"],
title="Successful Trajectories")
plt.xlabel("Episodes")
plt.ylabel("Successful Trajectories")
plt.savefig("successful" + str(num_episode) + ".png")
# plot of failed trajectories
plotted_data.plot(x="Episodes", y=["Failed trajectories"],
title="Failed Trajectories")
plt.xlabel("Episodes")
plt.ylabel("Failed Trajectories")
plt.savefig("Failed" + str(num_episode) + ".png")
plotted_data.plot(x="Episodes", y=["Ratio"],
title="Ratio between successful and failed trajectories")
plt.xlabel("Episodes")
plt.ylabel("Ratio")
plt.savefig("Ratio" + str(num_episode) + ".png")
plotted_data.plot(x="Episodes", y=["F1"],
title="F1 Score between Successful and Failed Trajectory")
plt.xlabel("Episodes")
plt.ylabel("F1 Score")
plt.savefig("F1" + str(num_episode) + ".png")
plotted_data.plot(x="Episodes", y=["loss"],
title="HER-DRQN model loss")
plt.xlabel("Episodes")
plt.ylabel("Loss")
plt.savefig("Loss" + str(num_episode) + ".png")
def validate(n, nodes_num, top_view, env, envT, ae, ae_sess, distance_threshold, model):
print("### Validation ###")
plotted_data_val = pd.DataFrame(
columns=["Episodes", "Successful trajectories", "Failed trajectories", "Ratio", "F1"])
val_success = 0
val_failures = 0
for i in range(100):
rnn_state = (np.zeros([1, nodes_num]), np.zeros([1, nodes_num]))
# reset environment
obs_state, pos_state, goal, distance, pose, pre_action_idx = env.reset()
if top_view:
# additional env top view for validation
agent_pos_top, pose_top = envT.reset(x_pos=pose[0],
y_pos=pose[1],
z_pos=pose[2],
angle=pose[4])
if top_view:
# validation the position of the agent from two diff environment_object
train_valid_env_sync(pose, pose_top)
features = ae_sess.run(ae.feature_vector, feed_dict={ae.image: obs_state[None, :, :, :]})
features = np.squeeze(features, axis=0)
obs_pos_state = np.concatenate((features, pos_state), axis=0)
done = False
num_steps = 0
while not done:
curr_action_idx, rnn_state_ = model.sample_action(goal=goal,
batch_size=1,
trace_length=1,
epsilon=0,
rnn_state=rnn_state,
pos_obs_state=obs_pos_state,
pre_action=pre_action_idx)
obs_state_, pos_state_, distance_, done, reward, collision, pose_ = env.step(curr_action_idx,
goal, distance)
if top_view:
# top view environment used for verification of the main environment
obsStateT, posStateT, distanceT, doneT, rewardT, collisionT, agentPoseT = envT.step(
curr_action_idx,
goal, distance)
if top_view:
# validation the postion of the agent from two diff environment_object
train_valid_env_sync(pose_, agentPoseT)
features_ = ae_sess.run(ae.feature_vector, feed_dict={ae.image: obs_state_[None, :, :, :]})
features_ = np.squeeze(features_, axis=0)
obs_pos_state_ = np.concatenate((features_, pos_state_), axis=0)
rnn_state = rnn_state_
obs_pos_state = obs_pos_state_
distance = distance_
pre_action_idx = curr_action_idx
num_steps += 1
if done:
if distance <= distance_threshold:
val_success += done
else:
val_failures += done
if num_steps == 200:
done = True
val_failures += done
break
print("validation_success:", val_success, "validation_failures:", val_failures, "steps_num",num_steps)
plotted_data_val = plotted_data_val.append({"Episodes": str(i),
"Successful trajectories": val_success / (i+1),
"Failed trajectories": val_failures / (i+1),
"Ratio": (val_success / (val_failures + 1)),
"F1": ((1 - (val_failures / (n + 1))) * (val_success / (n + 1))) /
((1 - (val_failures / (n + 1))) + ((val_success / (n + 1))) + 1)
}, ignore_index=True)
plotted_data_val.plot(x="Episodes", y=["Successful trajectories", "Failed trajectories", "Ratio"],
title="Validation Agent Learning Ratio")
plt.xlabel("Episodes")
plt.ylabel("Successful/Failed Trajectories and Ratio")
plt.savefig("Vaildation_failed_success_ratio " +str(n) + str(i) + ".png")
plotted_data_val.plot(x="Episodes", y=["Successful trajectories", "Failed trajectories", "F1"],
title="Validation Agent Learning F1")
plt.xlabel("Episodes")
plt.ylabel("Successful/Failed Trajectories and F1 Score")
plt.savefig("Vaildation_failed_success_F1 " +str(n) + str(i) + ".png")
plotted_data_val.plot(x="Episodes", y=["F1"],
title="F1 Score")
plt.xlabel("Episodes")
plt.ylabel("F1 Score between Successful and Failed Trajectory")
plt.savefig("F1 Score" +str(n) + str(i) + ".png")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment