186 lines
3.3 KiB
YAML
186 lines
3.3 KiB
YAML
|
defaults:
|
||
|
gpu: 'none'
|
||
|
logdir: ./
|
||
|
traindir: null
|
||
|
evaldir: null
|
||
|
offline_traindir: ''
|
||
|
offline_evaldir: ''
|
||
|
seed: 0
|
||
|
steps: 1e7
|
||
|
eval_every: 1e4
|
||
|
log_every: 1e4
|
||
|
reset_every: 0
|
||
|
gpu_growth: True
|
||
|
precision: 32
|
||
|
debug: False
|
||
|
expl_gifs: False
|
||
|
|
||
|
# Environment
|
||
|
task: 'dmc_walker_walk'
|
||
|
size: [64, 64]
|
||
|
envs: 1
|
||
|
action_repeat: 2
|
||
|
time_limit: 1000
|
||
|
prefill: 2500
|
||
|
eval_noise: 0.0
|
||
|
clip_rewards: 'identity'
|
||
|
atari_grayscale: False
|
||
|
|
||
|
# Model
|
||
|
dyn_cell: 'gru'
|
||
|
dyn_hidden: 200
|
||
|
dyn_deter: 200
|
||
|
dyn_stoch: 50
|
||
|
dyn_discrete: 0
|
||
|
dyn_input_layers: 1
|
||
|
dyn_output_layers: 1
|
||
|
dyn_shared: False
|
||
|
dyn_mean_act: 'none'
|
||
|
dyn_std_act: 'sigmoid2'
|
||
|
dyn_min_std: 0.1
|
||
|
grad_heads: ['image', 'reward']
|
||
|
units: 400
|
||
|
reward_layers: 2
|
||
|
discount_layers: 3
|
||
|
value_layers: 3
|
||
|
actor_layers: 4
|
||
|
act: 'elu'
|
||
|
cnn_depth: 32
|
||
|
encoder_kernels: [4, 4, 4, 4]
|
||
|
decoder_kernels: [5, 5, 6, 6]
|
||
|
decoder_thin: True
|
||
|
value_head: 'normal'
|
||
|
kl_scale: '1.0'
|
||
|
kl_balance: '0.8'
|
||
|
kl_free: '1.0'
|
||
|
pred_discount: False
|
||
|
discount_scale: 1.0
|
||
|
reward_scale: 1.0
|
||
|
weight_decay: 0.0
|
||
|
|
||
|
# Training
|
||
|
batch_size: 50
|
||
|
batch_length: 50
|
||
|
train_every: 5
|
||
|
train_steps: 1
|
||
|
pretrain: 100
|
||
|
model_lr: 3e-4
|
||
|
value_lr: 8e-5
|
||
|
actor_lr: 8e-5
|
||
|
opt_eps: 1e-5
|
||
|
grad_clip: 100
|
||
|
value_grad_clip: 100
|
||
|
actor_grad_clip: 100
|
||
|
dataset_size: 0
|
||
|
oversample_ends: False
|
||
|
slow_value_target: True
|
||
|
slow_actor_target: True
|
||
|
slow_target_update: 100
|
||
|
slow_target_fraction: 1
|
||
|
opt: 'adam'
|
||
|
|
||
|
# Behavior.
|
||
|
discount: 0.99
|
||
|
discount_lambda: 0.95
|
||
|
imag_horizon: 15
|
||
|
imag_gradient: 'dynamics'
|
||
|
imag_gradient_mix: '0.1'
|
||
|
imag_sample: True
|
||
|
actor_dist: 'trunc_normal'
|
||
|
actor_entropy: '1e-4'
|
||
|
actor_state_entropy: 0.0
|
||
|
actor_init_std: 1.0
|
||
|
actor_min_std: 0.1
|
||
|
actor_disc: 5
|
||
|
actor_temp: 0.1
|
||
|
actor_outscale: 0.0
|
||
|
expl_amount: 0.0
|
||
|
eval_state_mean: False
|
||
|
collect_dyn_sample: True
|
||
|
behavior_stop_grad: True
|
||
|
value_decay: 0.0
|
||
|
future_entropy: False
|
||
|
|
||
|
# Exploration
|
||
|
expl_behavior: 'greedy'
|
||
|
expl_until: 0
|
||
|
expl_extr_scale: 0.0
|
||
|
expl_intr_scale: 1.0
|
||
|
disag_target: 'stoch'
|
||
|
disag_log: True
|
||
|
disag_models: 10
|
||
|
disag_offset: 1
|
||
|
disag_layers: 4
|
||
|
disag_units: 400
|
||
|
|
||
|
atari:
|
||
|
|
||
|
# General
|
||
|
task: 'atari_demon_attack'
|
||
|
steps: 3e7
|
||
|
eval_every: 1e5
|
||
|
log_every: 1e4
|
||
|
prefill: 50000
|
||
|
dataset_size: 2e6
|
||
|
pretrain: 0
|
||
|
precision: 16
|
||
|
|
||
|
# Environment
|
||
|
time_limit: 108000 # 30 minutes of game play.
|
||
|
atari_grayscale: True
|
||
|
action_repeat: 4
|
||
|
eval_noise: 0.001
|
||
|
train_every: 16
|
||
|
train_steps: 1
|
||
|
clip_rewards: 'tanh'
|
||
|
|
||
|
# Model
|
||
|
grad_heads: ['image', 'reward', 'discount']
|
||
|
dyn_cell: 'gru_layer_norm'
|
||
|
pred_discount: True
|
||
|
cnn_depth: 48
|
||
|
dyn_deter: 600
|
||
|
dyn_hidden: 600
|
||
|
dyn_stoch: 32
|
||
|
dyn_discrete: 32
|
||
|
reward_layers: 4
|
||
|
discount_layers: 4
|
||
|
value_layers: 4
|
||
|
actor_layers: 4
|
||
|
|
||
|
# Behavior
|
||
|
actor_dist: 'onehot'
|
||
|
actor_entropy: 'linear(3e-3,3e-4,2.5e6)'
|
||
|
expl_amount: 0.0
|
||
|
expl_until: 3e7
|
||
|
discount: 0.995
|
||
|
imag_gradient: 'both'
|
||
|
imag_gradient_mix: 'linear(0.1,0,2.5e6)'
|
||
|
|
||
|
# Training
|
||
|
discount_scale: 5.0
|
||
|
reward_scale: 1
|
||
|
weight_decay: 1e-6
|
||
|
model_lr: 2e-4
|
||
|
kl_scale: 0.1
|
||
|
kl_free: 0.0
|
||
|
actor_lr: 4e-5
|
||
|
value_lr: 1e-4
|
||
|
oversample_ends: True
|
||
|
|
||
|
# Disen
|
||
|
disen_cnn_depth: 16
|
||
|
disen_only_scale: 1.0
|
||
|
disen_discount_scale: 2000.0
|
||
|
disen_reward_scale: 2000.0
|
||
|
num_reward_opt_iters: 20
|
||
|
|
||
|
debug:
|
||
|
|
||
|
debug: True
|
||
|
pretrain: 1
|
||
|
prefill: 1
|
||
|
train_steps: 1
|
||
|
batch_size: 10
|
||
|
batch_length: 20
|