Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
sjmonagi
Simulated visual based reinforcement learning for navigation with Hindsight experience Replay
Commits
519c3b97
Commit
519c3b97
authored
Apr 15, 2020
by
sjmonagi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
playing
parent
7858489f
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
1461 additions
and
0 deletions
+1461
-0
Agent_wiz_6_Action_shaped_reward_HER/DRQN_HER.py
Agent_wiz_6_Action_shaped_reward_HER/DRQN_HER.py
+195
-0
Agent_wiz_6_Action_shaped_reward_HER/DRQN_HER_Training.py
Agent_wiz_6_Action_shaped_reward_HER/DRQN_HER_Training.py
+194
-0
Agent_wiz_6_Action_shaped_reward_HER/Environment.py
Agent_wiz_6_Action_shaped_reward_HER/Environment.py
+227
-0
Agent_wiz_6_Action_shaped_reward_HER/Environment_top_view.py
Agent_wiz_6_Action_shaped_reward_HER/Environment_top_view.py
+176
-0
Agent_wiz_6_Action_shaped_reward_HER/HER_shaped_reward_pos_images_6actions_otputs
...d_reward_HER/HER_shaped_reward_pos_images_6actions_otputs
+35
-0
Agent_wiz_6_Action_shaped_reward_HER/Her_episodes_experiences.py
...iz_6_Action_shaped_reward_HER/Her_episodes_experiences.py
+27
-0
Agent_wiz_6_Action_shaped_reward_HER/autoencoder.py
Agent_wiz_6_Action_shaped_reward_HER/autoencoder.py
+331
-0
Agent_wiz_6_Action_shaped_reward_HER/experience_buffer.py
Agent_wiz_6_Action_shaped_reward_HER/experience_buffer.py
+99
-0
Agent_wiz_6_Action_shaped_reward_HER/helper.py
Agent_wiz_6_Action_shaped_reward_HER/helper.py
+177
-0
No files found.
Agent_wiz_6_Action_shaped_reward_HER/DRQN_HER.py
0 → 100644
View file @
519c3b97
import
random
import
numpy
as
np
import
tensorflow
as
tf
random
.
seed
(
123
)
np
.
random
.
seed
(
123
)
class
DRQN
(
object
):
def
__init__
(
self
,
action_n
,
cell
,
scope
,
fcl_dims
,
save_path
,
input_size
,
nodes_num
,
gamma
=
0.98
):
self
.
cell
=
cell
self
.
scope
=
scope
self
.
gamma
=
gamma
self
.
fc1_dims
=
fcl_dims
self
.
action_n
=
action_n
self
.
save_path
=
save_path
self
.
nodes_num
=
nodes_num
with
tf
.
variable_scope
(
scope
):
# seperate agent observation, and positions
self
.
inputs
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
515
),
name
=
"features_positions"
)
# additional goals
self
.
goals
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
3
),
name
=
"Goals_"
)
# previous_action
self
.
pre_action
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
None
,),
name
=
"pre_action"
)
# actions
self
.
actions
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
None
,),
name
=
"actions"
)
# Q-targets-values
self
.
Q_values
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,),
name
=
"Targets_Q_Values"
)
self
.
pre_action_
=
tf
.
one_hot
(
self
.
pre_action
,
self
.
action_n
,
dtype
=
tf
.
float32
,
name
=
"pre_action_OneHot_enc"
)
lstm_input
=
tf
.
concat
((
self
.
inputs
,
self
.
goals
),
axis
=
1
)
lstm_input_
=
tf
.
concat
((
lstm_input
,
self
.
pre_action_
),
axis
=
1
)
with
tf
.
variable_scope
(
"RNN"
):
self
.
train_length
=
tf
.
placeholder
(
tf
.
int32
)
self
.
batch_size
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
[])
self
.
input_flat
=
tf
.
reshape
(
tf
.
layers
.
flatten
(
lstm_input_
),
[
self
.
batch_size
,
self
.
train_length
,
input_size
])
# number_of_units may need to be changed
# self.cell = tf.contrib.rnn.BasicLSTMCell(num_units=self.nodes_num,
# state_is_tuple=True)
self
.
state_in
=
self
.
cell
.
zero_state
(
self
.
batch_size
,
tf
.
float32
)
self
.
rnn
,
self
.
rnn_state
=
tf
.
nn
.
dynamic_rnn
(
inputs
=
self
.
input_flat
,
cell
=
self
.
cell
,
dtype
=
tf
.
float32
,
initial_state
=
self
.
state_in
,
scope
=
scope
+
'_rnn'
)
self
.
rnn_flat
=
tf
.
reshape
(
self
.
rnn
,
shape
=
[
-
1
,
self
.
nodes_num
])
dense1
=
tf
.
layers
.
dense
(
self
.
rnn_flat
,
self
.
fc1_dims
,
activation
=
tf
.
nn
.
relu
,
trainable
=
True
)
# final output layer
self
.
predict_op
=
tf
.
layers
.
dense
(
dense1
,
action_n
,
trainable
=
True
)
actions_q_values
=
tf
.
reduce_sum
(
self
.
predict_op
*
tf
.
one_hot
(
self
.
actions
,
self
.
action_n
),
reduction_indices
=
[
1
])
# self.clipped_Q_values = tf.clip_by_value(self.Q_values, -1 / (1 - self.gamma), 0)
self
.
cost
=
tf
.
reduce_mean
(
tf
.
square
(
self
.
Q_values
-
actions_q_values
))
self
.
train_op
=
tf
.
train
.
AdamOptimizer
(
1e-3
).
minimize
(
self
.
cost
)
tf
.
summary
.
scalar
(
"Cost"
,
self
.
cost
)
tf
.
summary
.
histogram
(
"Goals"
,
self
.
goals
)
tf
.
summary
.
histogram
(
"Action_Q_values"
,
self
.
Q_values
)
tf
.
summary
.
histogram
(
"LSTM"
,
self
.
rnn
)
tf
.
summary
.
histogram
(
"LSTM_State"
,
self
.
rnn_state
)
self
.
merged
=
tf
.
summary
.
merge_all
()
def
hard_update_from
(
self
,
other
):
mine
=
[
t
for
t
in
tf
.
trainable_variables
()
if
t
.
name
.
startswith
(
self
.
scope
)]
mine
=
sorted
(
mine
,
key
=
lambda
v
:
v
.
name
)
theirs
=
[
t
for
t
in
tf
.
trainable_variables
()
if
t
.
name
.
startswith
(
other
.
scope
)]
theirs
=
sorted
(
theirs
,
key
=
lambda
v
:
v
.
name
)
self
.
session
.
run
([
v_t
.
assign
(
v
)
for
v_t
,
v
in
zip
(
mine
,
theirs
)])
def
soft_update_from
(
self
,
other
,
tau
=
0.95
):
mine
=
[
t
for
t
in
tf
.
trainable_variables
()
if
t
.
name
.
startswith
(
self
.
scope
)]
mine
=
sorted
(
mine
,
key
=
lambda
v
:
v
.
name
)
theirs
=
[
t
for
t
in
tf
.
trainable_variables
()
if
t
.
name
.
startswith
(
other
.
scope
)]
theirs
=
sorted
(
theirs
,
key
=
lambda
v
:
v
.
name
)
self
.
session
.
run
([
v_t
.
assign
(
v_t
*
(
1.
-
tau
)
+
v
*
tau
)
for
v_t
,
v
in
zip
(
mine
,
theirs
)])
def
set_session
(
self
,
session
):
self
.
session
=
session
def
predict
(
self
,
pos_obs_state
,
goals
,
batch_size
,
trace_length
,
rnn_state
,
pre_action
):
actions_q_values
,
rnn
,
rnn_state_
=
self
.
session
.
run
([
self
.
predict_op
,
self
.
rnn
,
self
.
rnn_state
],
feed_dict
=
{
self
.
goals
:
goals
,
self
.
state_in
:
rnn_state
,
self
.
inputs
:
pos_obs_state
,
self
.
batch_size
:
batch_size
,
self
.
pre_action
:
pre_action
,
self
.
train_length
:
trace_length
})
return
actions_q_values
,
rnn
,
rnn_state_
def
update
(
self
,
goals
,
states
,
actions
,
batch_size
,
q_values
,
trace_length
,
rnn_state
,
pre_action
):
self
.
c
,
_
=
self
.
session
.
run
([
self
.
cost
,
self
.
train_op
],
feed_dict
=
{
self
.
goals
:
goals
,
self
.
inputs
:
states
,
self
.
actions
:
actions
,
self
.
Q_values
:
q_values
,
self
.
state_in
:
rnn_state
,
self
.
batch_size
:
batch_size
,
self
.
pre_action
:
pre_action
,
self
.
train_length
:
trace_length
})
return
self
.
c
def
sample_action
(
self
,
goal
,
batch_size
,
trace_length
,
epsilon
,
rnn_state
,
pos_obs_state
,
pre_action
):
"""Implements epsilon greedy algorithm"""
if
np
.
random
.
random
()
<
epsilon
:
q_values
,
rnn
,
rnn_state_
=
self
.
predict
(
pos_obs_state
=
[
pos_obs_state
],
goals
=
[
goal
],
pre_action
=
[
pre_action
],
batch_size
=
batch_size
,
trace_length
=
trace_length
,
rnn_state
=
rnn_state
)
action
=
np
.
random
.
randint
(
1
,
self
.
action_n
)
else
:
action_q_values
,
_
,
rnn_state_
=
self
.
predict
(
pos_obs_state
=
[
pos_obs_state
],
goals
=
[
goal
],
pre_action
=
[
pre_action
],
batch_size
=
batch_size
,
trace_length
=
trace_length
,
rnn_state
=
rnn_state
)
action
=
np
.
argmax
(
action_q_values
[
0
])
return
action
,
rnn_state_
def
load
(
self
):
self
.
saver
=
tf
.
train
.
Saver
(
tf
.
global_variables
())
load_was_success
=
True
try
:
save_dir
=
'/'
.
join
(
self
.
save_path
.
split
(
'/'
)[:
-
1
])
ckpt
=
tf
.
train
.
get_checkpoint_state
(
save_dir
)
load_path
=
ckpt
.
model_checkpoint_path
self
.
saver
.
restore
(
self
.
session
,
load_path
)
except
:
print
(
"no saved model to load. starting new session"
)
load_was_success
=
False
else
:
print
(
"loaded model: {}"
.
format
(
load_path
))
saver
=
tf
.
train
.
Saver
(
tf
.
global_variables
())
episode_number
=
int
(
load_path
.
split
(
'-'
)[
-
1
])
def
save
(
self
,
n
):
self
.
saver
.
save
(
self
.
session
,
self
.
save_path
,
global_step
=
n
)
print
(
"SAVED MODEL #{}"
.
format
(
n
))
def
optimize
(
self
,
model
,
target_model
,
batch_size
,
trace_length
,
her_buffer
,
optimization_steps
):
losses
=
0
for
_
in
range
(
optimization_steps
):
rnn_stat_train
=
(
np
.
zeros
([
batch_size
,
self
.
nodes_num
]),
np
.
zeros
([
batch_size
,
self
.
nodes_num
]))
train_batch
=
her_buffer
.
sample
(
batch_size
=
batch_size
,
trace_length
=
trace_length
)
pre_action
,
states
,
curr_actions
,
rewards
,
next_states
,
dones
,
goals
=
map
(
np
.
array
,
zip
(
*
train_batch
))
# Calculate targets
next_Qs
,
_
,
_
=
target_model
.
predict
(
goals
=
goals
,
pre_action
=
pre_action
,
pos_obs_state
=
next_states
,
rnn_state
=
rnn_stat_train
,
trace_length
=
trace_length
,
batch_size
=
batch_size
)
next_Q
=
np
.
amax
(
next_Qs
,
axis
=
1
)
target_q_values
=
rewards
+
np
.
invert
(
dones
).
astype
(
np
.
float32
)
*
self
.
gamma
*
next_Q
# Calculate network loss
loss
=
model
.
update
(
goals
=
goals
,
states
=
states
,
actions
=
curr_actions
,
pre_action
=
pre_action
,
rnn_state
=
rnn_stat_train
,
q_values
=
target_q_values
,
trace_length
=
trace_length
,
batch_size
=
batch_size
)
losses
+=
loss
return
losses
/
optimization_steps
Agent_wiz_6_Action_shaped_reward_HER/DRQN_HER_Training.py
0 → 100644
View file @
519c3b97
import
random
import
numpy
as
np
import
pandas
as
pd
import
tensorflow
as
tf
from
Agent_shaped_reward_wiz_her_images_and_postions_6actions.DRQN_HER
import
DRQN
from
Agent_shaped_reward_wiz_her_images_and_postions_6actions.Environment
import
Environment
from
Agent_shaped_reward_wiz_her_images_and_postions_6actions.Environment_top_view
import
Environment_topview
from
Agent_shaped_reward_wiz_her_images_and_postions_6actions.Her_episodes_experiences
import
her_buffer
from
Agent_shaped_reward_wiz_her_images_and_postions_6actions.autoencoder
import
load_autoencoder
from
Agent_shaped_reward_wiz_her_images_and_postions_6actions.experience_buffer
import
experience_buffer
from
Agent_shaped_reward_wiz_her_images_and_postions_6actions.helper
import
plotting_training_log
,
train_valid_env_sync
,
validate
random
.
seed
(
123
)
np
.
random
.
seed
(
123
)
fields_name
=
[
"iteration"
,
"successes"
]
dir
=
"/home/nagi/Desktop/Master_project_final/DRQN_3_her_sparse_image_and_pos_F1_6_Actions/DRQN.ckpt"
##### environment_Variables
grid_size
=
0.18
# size of the agent step
top_view
=
True
# displaying top-view
distance_threshold
=
grid_size
*
2
# distance threshold to the goal
action_n
=
6
# number of allowed action
random_init_position
=
False
# Random initial positions only -- no change in the agent orientation
random_init_pose
=
True
# Random initial positions with random agent orientation
reward
=
"shaped"
# reward type "shaped","sparse"
######################### hyper-parameter
num_episodes
=
15001
her_samples
=
8
batch_size
=
32
trace_length
=
8
gamma
=
0.99
fcl_dims
=
512
nodes_num
=
256
optimistion_steps
=
40
epsilon_max
=
1
epsilon_min
=
0
input_size
=
524
## size of the input to the LSTM
epsilon_decay
=
epsilon_max
-
(
epsilon_max
/
3000
)
## pandas data-frame for plotting
plotted_data
=
pd
.
DataFrame
(
columns
=
[
"Episodes"
,
"Successful trajectories"
,
"Failed trajectories"
,
"Ratio"
,
"loss"
,
"epsilon"
,
"F1"
])
# experience replay parameters
her_rec_buffer
=
her_buffer
()
episode_buffer
=
experience_buffer
(
distance
=
distance_threshold
,
reward_typ
=
reward
,
her_samples
=
her_samples
)
env
=
Environment
(
random_init_position
=
random_init_position
,
random_init_pos_orient
=
random_init_pose
,
reward_typ
=
reward
,
distance
=
distance_threshold
,
random_goals
=
True
,
grid_size
=
grid_size
,
agent_mode
=
"bot"
)
if
top_view
:
envT
=
Environment_topview
(
grid_size
=
grid_size
,
agent_mode
=
"bot"
,
distance
=
distance_threshold
,
reward_typ
=
reward
)
# Autoenconder
print
(
"Autoencoder"
)
ae_sess
,
ae
=
load_autoencoder
()
global_step
=
tf
.
Variable
(
0
,
name
=
"global_step"
,
trainable
=
False
)
loss
=
0
# main loop
print
(
"DQN_HER_Model"
)
drqn_graph
=
tf
.
Graph
()
cell
=
tf
.
nn
.
rnn_cell
.
LSTMCell
(
num_units
=
nodes_num
,
state_is_tuple
=
True
)
cellT
=
tf
.
nn
.
rnn_cell
.
LSTMCell
(
num_units
=
nodes_num
,
state_is_tuple
=
True
)
model
=
DRQN
(
action_n
=
action_n
,
cell
=
cell
,
fcl_dims
=
fcl_dims
,
scope
=
"model"
,
save_path
=
dir
,
nodes_num
=
nodes_num
,
input_size
=
input_size
)
target_model
=
DRQN
(
action_n
=
action_n
,
cell
=
cellT
,
fcl_dims
=
fcl_dims
,
scope
=
"target_model"
,
save_path
=
dir
,
nodes_num
=
nodes_num
,
input_size
=
input_size
)
print
(
"##### Env with grid_size equals"
,
grid_size
,
"and"
,
reward
,
"reward ######"
)
with
tf
.
Session
()
as
sess
:
model
.
set_session
(
sess
)
target_model
.
set_session
(
sess
)
sess
.
run
(
tf
.
global_variables_initializer
())
model
.
load
()
start
=
global_step
.
eval
(
sess
)
successes
=
0
failures
=
0
epsilon
=
1
for
n
in
range
(
start
,
num_episodes
):
step_num
=
0
# rnn_init_state
rnn_state
=
(
np
.
zeros
([
1
,
nodes_num
]),
np
.
zeros
([
1
,
nodes_num
]))
# reset environment
obs_state
,
pos_state
,
goal
,
distance
,
pose
,
pre_action_idx
=
env
.
reset
()
if
top_view
:
# additional env top view for validation
agent_pos_top
,
pose_top
=
envT
.
reset
(
x_pos
=
pose
[
0
],
y_pos
=
pose
[
1
],
z_pos
=
pose
[
2
],
angle
=
pose
[
4
])
if
top_view
:
# validation the position of the agent from two diff environment_object
train_valid_env_sync
(
pose
,
pose_top
)
features
=
ae_sess
.
run
(
ae
.
feature_vector
,
feed_dict
=
{
ae
.
image
:
obs_state
[
None
,
:,
:,
:]})
features
=
np
.
squeeze
(
features
,
axis
=
0
)
obs_pos_state
=
np
.
concatenate
((
features
,
pos_state
),
axis
=
0
)
done
=
False
while
not
done
:
curr_action_idx
,
rnn_state_
=
model
.
sample_action
(
goal
=
goal
,
batch_size
=
1
,
trace_length
=
1
,
epsilon
=
epsilon
,
rnn_state
=
rnn_state
,
pos_obs_state
=
obs_pos_state
,
pre_action
=
pre_action_idx
)
obs_state_
,
pos_state_
,
distance_
,
done
,
reward
,
collision
,
pose_
=
env
.
step
(
curr_action_idx
,
goal
,
distance
)
if
top_view
:
# top view environment used for verification of the main environment
obsStateT
,
posStateT
,
distanceT
,
doneT
,
rewardT
,
collisionT
,
agentPoseT
=
envT
.
step
(
curr_action_idx
,
goal
,
distance
)
if
top_view
:
# validation the postion of the agent from two diff environment_object
train_valid_env_sync
(
pose_
,
agentPoseT
)
features_
=
ae_sess
.
run
(
ae
.
feature_vector
,
feed_dict
=
{
ae
.
image
:
obs_state_
[
None
,
:,
:,
:]})
features_
=
np
.
squeeze
(
features_
,
axis
=
0
)
obs_pos_state_
=
np
.
concatenate
((
features_
,
pos_state_
),
axis
=
0
)
# append to episode buffer
episode_buffer
.
add
(
np
.
reshape
(
np
.
array
([
pre_action_idx
,
obs_pos_state
,
curr_action_idx
,
reward
,
obs_pos_state_
,
done
,
goal
]),
[
1
,
7
]))
rnn_state
=
rnn_state_
obs_pos_state
=
obs_pos_state_
distance
=
distance_
pre_action_idx
=
curr_action_idx
step_num
+=
1
if
done
:
if
distance
<
distance_threshold
:
successes
+=
done
else
:
failures
+=
done
break
if
step_num
==
200
:
done
=
True
failures
+=
done
break
her_buffer
=
episode_buffer
.
her
()
her_rec_buffer
.
add
(
her_buffer
)
episode_buffer
.
clear
()
plotted_data
=
plotted_data
.
append
({
"Episodes"
:
str
(
n
),
"Successful trajectories"
:
successes
/
(
n
+
1
),
"Failed trajectories"
:
failures
/
(
n
+
1
),
"Ratio"
:
(
successes
/
(
failures
+
1
)),
"loss"
:
loss
,
"epsilon"
:
epsilon
,
"F1"
:
((
1
-
(
failures
/
(
n
+
1
)))
*
(
successes
/
(
n
+
1
)))
/
(((
1
-
(
failures
/
(
n
+
1
)))
+
((
successes
/
(
n
+
1
))))
+
1
)},
ignore_index
=
True
)
plotting_training_log
(
n
,
plotted_data
,
successes
,
failures
,
loss
,
goal
,
distance
,
pos_state
,
epsilon
,
step_num
)
###validation###
if
n
%
2000
==
0
and
n
>
0
:
validate
(
n
=
n
,
nodes_num
=
nodes_num
,
top_view
=
top_view
,
env
=
env
,
envT
=
envT
,
ae
=
ae
,
ae_sess
=
ae_sess
,
distance_threshold
=
distance_threshold
,
model
=
model
)
if
n
>
100
and
n
!=
0
:
loss
=
model
.
optimize
(
model
=
model
,
batch_size
=
batch_size
,
trace_length
=
trace_length
,
target_model
=
target_model
,
her_buffer
=
her_rec_buffer
,
optimization_steps
=
optimistion_steps
)
if
n
%
4000
==
0
and
n
>
0
:
print
(
"#### update model ####"
)
target_model
.
soft_update_from
(
model
)
epsilon
=
max
(
epsilon
*
epsilon_decay
,
epsilon_min
)
global_step
.
assign
(
n
).
eval
()
# saving
if
n
%
50
==
0
and
n
>
0
:
model
.
save
(
n
)
Agent_wiz_6_Action_shaped_reward_HER/Environment.py
0 → 100644
View file @
519c3b97
import
random
import
re
import
ai2thor.controller
import
numpy
as
np
import
pandas
as
pd
import
ai2thor.controller
random
.
seed
(
123
)
np
.
random
.
seed
(
123
)
class
Environment
(
object
):
def
__init__
(
self
,
distance
,
reward_typ
,
action_n
=
6
,
grid_size
=
0.15
,
visibility_distance
=
1.5
,
player_screen_width
=
300
,
player_screen_height
=
300
,
full_scrn
=
False
,
depth_image
=
False
,
random_init_position
=
False
,
random_init_pos_orient
=
False
,
random_goals
=
False
,
scene
=
"FloorPlan225"
,
agent_mode
=
"tall"
):
self
.
scene
=
scene
self
.
action_n
=
action_n
self
.
distance
=
distance
self
.
grid_size
=
grid_size
self
.
full_scrn
=
full_scrn
self
.
reward_typ
=
reward_typ
self
.
agent_mode
=
agent_mode
self
.
depth_image
=
depth_image
self
.
random_goal
=
random_goals
self
.
visibility_distance
=
visibility_distance
self
.
player_screen_width
=
player_screen_width
self
.
player_screen_height
=
player_screen_height
self
.
random_init_position
=
random_init_position
self
.
random_init_pos_orient
=
random_init_pos_orient
self
.
orientations
=
[
0.0
,
90.0
,
180.0
,
270.0
,
360.0
]
self
.
ctrl
=
ai2thor
.
controller
.
Controller
(
scene
=
self
.
scene
,
gridSize
=
self
.
grid_size
,
renderDepthImage
=
self
.
depth_image
,
visibilityDistance
=
self
.
visibility_distance
,
agentMode
=
self
.
agent_mode
)
def
reset
(
self
):
new_random_goal
=
0
self
.
ctrl
.
reset
(
self
.
scene
)
agent_init_position
,
random_goal
=
self
.
random_positions
()
if
self
.
random_init_pos_orient
:
# Random init Agent positions and orientation
self
.
ctrl
.
step
(
action
=
"TeleportFull"
,
x
=
agent_init_position
[
0
],
y
=
agent_init_position
[
1
],
z
=
agent_init_position
[
2
],
rotation
=
random
.
choice
(
self
.
orientations
),
horizon
=
0.0
)
elif
self
.
random_init_position
:
# Random init Agent positions only
self
.
ctrl
.
step
(
action
=
"Teleport"
,
x
=
agent_init_position
[
0
],
y
=
agent_init_position
[
1
],
z
=
agent_init_position
[
2
])
else
:
pass
if
self
.
random_goal
:
new_random_goal
=
random_goal
agent_position
,
agent_rotation
,
agent_pose
=
self
.
agent_properties
()
try
:
np
.
array_equal
(
np
.
array
(
list
(
self
.
ctrl
.
last_event
.
metadata
[
"agent"
][
"position"
].
values
())),
agent_position
)
except
:
print
(
"agent init position does not equal to agent position attribute"
)
pre_action_idx
=
0
# if the agent init_position equals the goal position respawn the agent in different position
new_goal
=
self
.
agent_goal_pos_not_equal
(
agent_position
,
new_random_goal
)
agent_pos_dis
=
np
.
linalg
.
norm
(
new_goal
-
agent_position
)
first_person_obs
=
self
.
ctrl
.
last_event
.
frame
return
first_person_obs
,
agent_position
,
new_goal
,
agent_pos_dis
,
agent_pose
,
pre_action_idx
def
step
(
self
,
action
,
goal
,
distance
):
first_person_obs
,
agent_position
,
distance_
,
done
,
reward
,
collision
,
agent_pose
=
0
,
0
,
0
,
0
,
0
,
0
,
0
if
action
==
0
:
self
.
ctrl
.
step
(
action
=
'RotateRight'
)
reward
,
done
,
distance_
,
first_person_obs
,
collision
,
agent_position
,
agent_pose
=
self
.
post_action_state
(
goal
,
distance
)
elif
action
==
1
:
self
.
ctrl
.
step
(
action
=
'RotateLeft'
)
reward
,
done
,
distance_
,
first_person_obs
,
collision
,
agent_position
,
agent_pose
=
self
.
post_action_state
(
goal
,
distance
)
elif
action
==
2
:
self
.
ctrl
.
step
(
action
=
"MoveAhead"
)
reward
,
done
,
distance_
,
first_person_obs
,
collision
,
agent_position
,
agent_pose
=
self
.
post_action_state
(
goal
,
distance
)
elif
action
==
3
:
self
.
ctrl
.
step
(
action
=
"MoveBack"
)
reward
,
done
,
distance_
,
first_person_obs
,
collision
,
agent_position
,
agent_pose
=
self
.
post_action_state
(
goal
,
distance
)
elif
action
==
4
:
self
.
ctrl
.
step
(
action
=
"MoveRight"
)
reward
,
done
,
distance_
,
first_person_obs
,
collision
,
agent_position
,
agent_pose
=
self
.
post_action_state
(
goal
,
distance
)
elif
action
==
5
:
self
.
ctrl
.
step
(
action
=
"MoveLeft"
)
reward
,
done
,
distance_
,
first_person_obs
,
collision
,
agent_position
,
agent_pose
=
self
.
post_action_state
(
goal
,
distance
)
return
first_person_obs
,
agent_position
,
distance_
,
done
,
reward
,
collision
,
agent_pose
def
agent_properties
(
self
):
agent_position
=
np
.
array
(
list
(
self
.
ctrl
.
last_event
.
metadata
[
"agent"
][
"position"
].
values
()))
agent_rotation
=
np
.
array
(
list
(
self
.
ctrl
.
last_event
.
metadata
[
"agent"
][
"rotation"
].
values
()))
agent_pose
=
np
.
concatenate
((
agent_position
,
agent_rotation
),
axis
=
0
)
return
agent_position
,
agent_rotation
,
agent_pose
def
get_reachable_position
(
self
):
self
.
ctrl
.
step
(
action
=
'GetReachablePositions'
)
return
pd
.
DataFrame
(
self
.
ctrl
.
last_event
.
metadata
[
"reachablePositions"
]).
values
def
random_positions
(
self
):
while
True
:
positions
=
self
.
get_reachable_position
()
random_positions
=
random
.
sample
(
list
(
positions
),
2
)
agent_pos
=
random_positions
[
0
]
goal_pos
=
random_positions
[
1
]
distance
=
np
.
linalg
.
norm
(
goal_pos
-
agent_pos
)
if
distance
>
1.5
*
self
.
distance
:
break
else
:
print
(
"Agent to Goal distance less than"
,
1.5
*
self
.
distance
)
return
agent_pos
,
goal_pos
def
post_action_state
(
self
,
goal
,
dist
):
if
self
.
reward_typ
==
"shaped"
:
reward
,
done
,
dist_
,
first_person_obs
,
collide
,
agent_position
,
agent_pose
=
self
.
shaped_reward
(
goal
=
goal
,
dist
=
dist
)
else
:
reward
,
done
,
dist_
,
first_person_obs
,
collide
,
agent_position
,
agent_pose
=
self
.
sparse_reward
(
goal
=
goal
)
return
reward
,
done
,
dist_
,
first_person_obs
,
collide
,
agent_position
,
agent_pose
def
agent_goal_pos_not_equal
(
self
,
agent_pos
,
goal_pos
):
new_random_goal_position
=
goal_pos
distance
=
np
.
linalg
.
norm
(
goal_pos
-
agent_pos
)
if
distance
<=
1.5
*
self
.
distance
:
print
(
"agent position and goal position < or ="
,
1.5
*
self
.
distance
)
_
,
new_random_goal_position
=
self
.
random_positions
()
print
(
"agent new position:"
,
agent_pos
[
0
],
","
,
agent_pos
[
2
],
"and goal_position"
,
new_random_goal_position
[
0
],
","
,
new_random_goal_position
[
2
])