×
type: MemoryGym
name: SearingSpotlights-v0
frame_skip: 1
last_action_to_obs: False
last_reward_to_obs: False
obs_stacks: 1
grayscale: False
resize_vis_obs: [84, 84]
positional_encoding: False
reset_params:
start-seed: 200001
num-seeds: 1
max_steps: 256
initial_spawns: 4
num_spawns: 30
initial_spawn_interval: 30
spawn_interval_threshold: 10
spawn_interval_decay: 0.95
spot_min_radius: 7.5
spot_max_radius: 13.75
spot_min_speed: 0.0025
spot_max_speed: 0.0075
spot_damage: 1.0
visual_feedback: True
black_background: False
hide_chessboard: False
light_dim_off_duration: 6
light_threshold: 255
num_coins: [1]
coin_scale: 0.375
coins_visible: False
use_exit: True
exit_visible: False
exit_scale: 0.5
agent_speed: 3.0
agent_health: 5
agent_scale: 0.25
agent_visible: False
sample_agent_position: True
show_last_action: True
show_last_positive_reward: True
reward_inside_spotlight: 0.0
reward_outside_spotlight: 0.0
reward_death: 0.0
reward_exit: 1.0
reward_max_steps: 0.0
reward_coin: 0.25
seed: 200001
reward_normalization: 0
×
load_model: False
model_path:
checkpoint_interval: 500
activation: relu
vis_encoder: cnn
vec_encoder: linear
num_vec_encoder_units: 128
hidden_layer: default
num_hidden_layers: 1
num_hidden_units: 384
transformer:
num_blocks: 3
embed_dim: 384
num_heads: 4
memory_length: 256
positional_encoding: absolute
layer_norm: pre
init_weights: xavier
gtrxl: False
gtrxl_bias: 0.0
gtrxl_swap: False
share_heads: True
max_episode_steps: 257
add_positional_encoding_to_query: False
obs_decoder:
attach_to: memory
detach_gradient: False
×
n_workers: 32
worker_steps: 512
×
algorithm: PPO
resume_at: 0
gamma: 0.995
lamda: 0.95
updates: 30000
epochs: 3
refresh_buffer_epoch: -1
n_mini_batches: 8
advantage_normalization: no
value_coefficient: 0.5
max_grad_norm: 0.25
share_parameters: True
learning_rate_schedule:
initial: 0.000275
final: 1e-05
power: 1.0
max_decay_steps: 20000
beta_schedule:
initial: 0.0001
final: 1e-06
power: 1.0
max_decay_steps: 20000
clip_range_schedule:
initial: 0.1
final: 0.1
power: 1.0
max_decay_steps: 10000
obs_reconstruction_schedule:
initial: 0.1
final: 0.1
power: 1.0
max_decay_steps: 1000
ground_truth_estimator_schedule: {'initial': 0.0}