tia/DreamerV2/configs.yaml
2023-07-17 10:48:40 +02:00

186 lines
3.3 KiB
YAML

defaults:
gpu: 'none'
logdir: ./
traindir: null
evaldir: null
offline_traindir: ''
offline_evaldir: ''
seed: 0
steps: 1e7
eval_every: 1e4
log_every: 1e4
reset_every: 0
gpu_growth: True
precision: 32
debug: False
expl_gifs: False
# Environment
task: 'dmc_walker_walk'
size: [64, 64]
envs: 1
action_repeat: 2
time_limit: 1000
prefill: 2500
eval_noise: 0.0
clip_rewards: 'identity'
atari_grayscale: False
# Model
dyn_cell: 'gru'
dyn_hidden: 200
dyn_deter: 200
dyn_stoch: 50
dyn_discrete: 0
dyn_input_layers: 1
dyn_output_layers: 1
dyn_shared: False
dyn_mean_act: 'none'
dyn_std_act: 'sigmoid2'
dyn_min_std: 0.1
grad_heads: ['image', 'reward']
units: 400
reward_layers: 2
discount_layers: 3
value_layers: 3
actor_layers: 4
act: 'elu'
cnn_depth: 32
encoder_kernels: [4, 4, 4, 4]
decoder_kernels: [5, 5, 6, 6]
decoder_thin: True
value_head: 'normal'
kl_scale: '1.0'
kl_balance: '0.8'
kl_free: '1.0'
pred_discount: False
discount_scale: 1.0
reward_scale: 1.0
weight_decay: 0.0
# Training
batch_size: 50
batch_length: 50
train_every: 5
train_steps: 1
pretrain: 100
model_lr: 3e-4
value_lr: 8e-5
actor_lr: 8e-5
opt_eps: 1e-5
grad_clip: 100
value_grad_clip: 100
actor_grad_clip: 100
dataset_size: 0
oversample_ends: False
slow_value_target: True
slow_actor_target: True
slow_target_update: 100
slow_target_fraction: 1
opt: 'adam'
# Behavior.
discount: 0.99
discount_lambda: 0.95
imag_horizon: 15
imag_gradient: 'dynamics'
imag_gradient_mix: '0.1'
imag_sample: True
actor_dist: 'trunc_normal'
actor_entropy: '1e-4'
actor_state_entropy: 0.0
actor_init_std: 1.0
actor_min_std: 0.1
actor_disc: 5
actor_temp: 0.1
actor_outscale: 0.0
expl_amount: 0.0
eval_state_mean: False
collect_dyn_sample: True
behavior_stop_grad: True
value_decay: 0.0
future_entropy: False
# Exploration
expl_behavior: 'greedy'
expl_until: 0
expl_extr_scale: 0.0
expl_intr_scale: 1.0
disag_target: 'stoch'
disag_log: True
disag_models: 10
disag_offset: 1
disag_layers: 4
disag_units: 400
atari:
# General
task: 'atari_demon_attack'
steps: 3e7
eval_every: 1e5
log_every: 1e4
prefill: 50000
dataset_size: 2e6
pretrain: 0
precision: 16
# Environment
time_limit: 108000 # 30 minutes of game play.
atari_grayscale: True
action_repeat: 4
eval_noise: 0.001
train_every: 16
train_steps: 1
clip_rewards: 'tanh'
# Model
grad_heads: ['image', 'reward', 'discount']
dyn_cell: 'gru_layer_norm'
pred_discount: True
cnn_depth: 48
dyn_deter: 600
dyn_hidden: 600
dyn_stoch: 32
dyn_discrete: 32
reward_layers: 4
discount_layers: 4
value_layers: 4
actor_layers: 4
# Behavior
actor_dist: 'onehot'
actor_entropy: 'linear(3e-3,3e-4,2.5e6)'
expl_amount: 0.0
expl_until: 3e7
discount: 0.995
imag_gradient: 'both'
imag_gradient_mix: 'linear(0.1,0,2.5e6)'
# Training
discount_scale: 5.0
reward_scale: 1
weight_decay: 1e-6
model_lr: 2e-4
kl_scale: 0.1
kl_free: 0.0
actor_lr: 4e-5
value_lr: 1e-4
oversample_ends: True
# Disen
disen_cnn_depth: 16
disen_only_scale: 1.0
disen_discount_scale: 2000.0
disen_reward_scale: 2000.0
num_reward_opt_iters: 20
debug:
debug: True
pretrain: 1
prefill: 1
train_steps: 1
batch_size: 10
batch_length: 20