×
type: MemoryGym
name: MortarMayhem-v0
frame_skip: 1
last_action_to_obs: False
last_reward_to_obs: False
obs_stacks: 1
grayscale: False
resize_vis_obs: [84, 84]
positional_encoding: False
reset_params:
start-seed: 200003
num-seeds: 1
agent_scale: 0.25
arena_size: 5
allowed_commands: 9
command_count: [10]
explosion_duration: [6]
explosion_delay: [18]
reward_command_failure: 0.0
reward_command_success: 0.1
reward_episode_success: 0.0
seed: 200003
reward_normalization: 0
positional_endocing: False
×
load_model: False
model_path:
checkpoint_interval: 500
activation: relu
vis_encoder: cnn
vec_encoder: linear
num_vec_encoder_units: 128
hidden_layer: default
num_hidden_layers: 1
num_hidden_units: 512
recurrence:
layer_type: gru
sequence_length: -1
hidden_state_size: 512
hidden_state_init: zero
reset_hidden_state: True
residual: False
num_layers: 1
obs_decoder:
attach_to: memory
detach_gradient: False
×
n_workers: 32
worker_steps: 512
×
algorithm: PPO
resume_at: 0
gamma: 0.995
lamda: 0.95
updates: 30000
epochs: 3
refresh_buffer_epoch: -1
n_mini_batches: 8
advantage_normalization: no
value_coefficient: 0.5
max_grad_norm: 0.25
share_parameters: True
learning_rate_schedule:
initial: 0.000275
final: 1e-05
power: 1.0
max_decay_steps: 10000
beta_schedule:
initial: 0.0001
final: 1e-06
power: 1.0
max_decay_steps: 10000
clip_range_schedule:
initial: 0.1
final: 0.1
power: 1.0
max_decay_steps: 10000
obs_reconstruction_schedule:
initial: 0.1
final: 0.1
power: 1.0
max_decay_steps: 1000