From 1ddf72b0c34a68ed3aba1fb8aadcf897950f7a79 Mon Sep 17 00:00:00 2001 From: VedantDave Date: Mon, 17 Jul 2023 10:48:01 +0200 Subject: [PATCH 1/2] Add Dreamer Files --- Dreamer/__pycache__/dreamers.cpython-37.pyc | Bin 0 -> 26044 bytes Dreamer/__pycache__/env_tools.cpython-37.pyc | Bin 0 -> 3111 bytes Dreamer/__pycache__/models.cpython-37.pyc | Bin 0 -> 11345 bytes Dreamer/__pycache__/tools.cpython-37.pyc | Bin 0 -> 19629 bytes Dreamer/__pycache__/tools.cpython-38.pyc | Bin 0 -> 19696 bytes Dreamer/__pycache__/wrappers.cpython-37.pyc | Bin 0 -> 19362 bytes Dreamer/__pycache__/wrappers.cpython-38.pyc | Bin 0 -> 19394 bytes Dreamer/dmc2gym/__init__.py | 52 ++ .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 991 bytes .../natural_imgsource.cpython-37.pyc | Bin 0 -> 2532 bytes .../__pycache__/wrappers.cpython-37.pyc | Bin 0 -> 5821 bytes Dreamer/dmc2gym/natural_imgsource.py | 82 ++ Dreamer/dmc2gym/wrappers.py | 208 +++++ Dreamer/dreamers.py | 741 ++++++++++++++++++ Dreamer/env_tools.py | 84 ++ Dreamer/local_dm_control_suite/README.md | 56 ++ Dreamer/local_dm_control_suite/__init__.py | 151 ++++ .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 4122 bytes .../__pycache__/acrobot.cpython-37.pyc | Bin 0 -> 4656 bytes .../__pycache__/ball_in_cup.cpython-37.pyc | Bin 0 -> 3244 bytes .../__pycache__/base.cpython-37.pyc | Bin 0 -> 4064 bytes .../__pycache__/cartpole.cpython-37.pyc | Bin 0 -> 7949 bytes .../__pycache__/cheetah.cpython-37.pyc | Bin 0 -> 2991 bytes .../__pycache__/finger.cpython-37.pyc | Bin 0 -> 7797 bytes .../__pycache__/fish.cpython-37.pyc | Bin 0 -> 6536 bytes .../__pycache__/hopper.cpython-37.pyc | Bin 0 -> 4590 bytes .../__pycache__/humanoid.cpython-37.pyc | Bin 0 -> 7041 bytes .../__pycache__/humanoid_CMU.cpython-37.pyc | Bin 0 -> 6208 bytes .../__pycache__/lqr.cpython-37.pyc | Bin 0 -> 8919 bytes .../__pycache__/manipulator.cpython-37.pyc | Bin 0 -> 9098 bytes .../__pycache__/pendulum.cpython-37.pyc | Bin 0 -> 3899 bytes .../__pycache__/point_mass.cpython-37.pyc | Bin 0 -> 4526 bytes .../__pycache__/quadruped.cpython-37.pyc | Bin 0 -> 15726 bytes .../__pycache__/reacher.cpython-37.pyc | Bin 0 -> 4303 bytes .../__pycache__/stacker.cpython-37.pyc | Bin 0 -> 6949 bytes .../__pycache__/swimmer.cpython-37.pyc | Bin 0 -> 7546 bytes .../__pycache__/walker.cpython-37.pyc | Bin 0 -> 5282 bytes Dreamer/local_dm_control_suite/acrobot.py | 127 +++ Dreamer/local_dm_control_suite/acrobot.xml | 43 + Dreamer/local_dm_control_suite/ball_in_cup.py | 100 +++ .../local_dm_control_suite/ball_in_cup.xml | 54 ++ Dreamer/local_dm_control_suite/base.py | 112 +++ Dreamer/local_dm_control_suite/cartpole.py | 230 ++++++ Dreamer/local_dm_control_suite/cartpole.xml | 37 + Dreamer/local_dm_control_suite/cheetah.py | 97 +++ Dreamer/local_dm_control_suite/cheetah.xml | 73 ++ .../local_dm_control_suite/common/__init__.py | 39 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 1008 bytes .../common/materials.xml | 23 + .../common/materials_white_floor.xml | 23 + .../local_dm_control_suite/common/skybox.xml | 6 + .../local_dm_control_suite/common/visual.xml | 7 + .../demos/mocap_demo.py | 84 ++ .../local_dm_control_suite/demos/zeros.amc | 213 +++++ Dreamer/local_dm_control_suite/explore.py | 84 ++ Dreamer/local_dm_control_suite/finger.py | 217 +++++ Dreamer/local_dm_control_suite/finger.xml | 72 ++ Dreamer/local_dm_control_suite/fish.py | 176 +++++ Dreamer/local_dm_control_suite/fish.xml | 85 ++ Dreamer/local_dm_control_suite/hopper.py | 138 ++++ Dreamer/local_dm_control_suite/hopper.xml | 66 ++ Dreamer/local_dm_control_suite/humanoid.py | 211 +++++ Dreamer/local_dm_control_suite/humanoid.xml | 202 +++++ .../local_dm_control_suite/humanoid_CMU.py | 179 +++++ .../local_dm_control_suite/humanoid_CMU.xml | 289 +++++++ Dreamer/local_dm_control_suite/lqr.py | 272 +++++++ Dreamer/local_dm_control_suite/lqr.xml | 26 + Dreamer/local_dm_control_suite/lqr_solver.py | 142 ++++ Dreamer/local_dm_control_suite/manipulator.py | 290 +++++++ .../local_dm_control_suite/manipulator.xml | 211 +++++ Dreamer/local_dm_control_suite/pendulum.py | 114 +++ Dreamer/local_dm_control_suite/pendulum.xml | 26 + Dreamer/local_dm_control_suite/point_mass.py | 130 +++ Dreamer/local_dm_control_suite/point_mass.xml | 49 ++ Dreamer/local_dm_control_suite/quadruped.py | 480 ++++++++++++ Dreamer/local_dm_control_suite/quadruped.xml | 329 ++++++++ Dreamer/local_dm_control_suite/reacher.py | 116 +++ Dreamer/local_dm_control_suite/reacher.xml | 47 ++ Dreamer/local_dm_control_suite/stacker.py | 208 +++++ Dreamer/local_dm_control_suite/stacker.xml | 193 +++++ Dreamer/local_dm_control_suite/swimmer.py | 215 +++++ Dreamer/local_dm_control_suite/swimmer.xml | 57 ++ .../tests/domains_test.py | 292 +++++++ .../tests/loader_test.py | 52 ++ .../local_dm_control_suite/tests/lqr_test.py | 88 +++ .../local_dm_control_suite/utils/__init__.py | 16 + .../local_dm_control_suite/utils/parse_amc.py | 251 ++++++ .../utils/parse_amc_test.py | 68 ++ .../utils/randomizers.py | 91 +++ .../utils/randomizers_test.py | 164 ++++ Dreamer/local_dm_control_suite/walker.py | 158 ++++ Dreamer/local_dm_control_suite/walker.xml | 70 ++ .../wrappers/__init__.py | 16 + .../wrappers/action_noise.py | 74 ++ .../wrappers/action_noise_test.py | 136 ++++ .../local_dm_control_suite/wrappers/pixels.py | 120 +++ .../wrappers/pixels_test.py | 133 ++++ Dreamer/models.py | 297 +++++++ Dreamer/run.py | 121 +++ Dreamer/tools.py | 474 +++++++++++ Dreamer/train_configs/dreamer.yaml | 74 ++ Dreamer/train_configs/inverse.yaml | 66 ++ Dreamer/train_configs/tia.yaml | 76 ++ Dreamer/wrappers.py | 540 +++++++++++++ 104 files changed, 10643 insertions(+) create mode 100644 Dreamer/__pycache__/dreamers.cpython-37.pyc create mode 100644 Dreamer/__pycache__/env_tools.cpython-37.pyc create mode 100644 Dreamer/__pycache__/models.cpython-37.pyc create mode 100644 Dreamer/__pycache__/tools.cpython-37.pyc create mode 100644 Dreamer/__pycache__/tools.cpython-38.pyc create mode 100644 Dreamer/__pycache__/wrappers.cpython-37.pyc create mode 100644 Dreamer/__pycache__/wrappers.cpython-38.pyc create mode 100644 Dreamer/dmc2gym/__init__.py create mode 100644 Dreamer/dmc2gym/__pycache__/__init__.cpython-37.pyc create mode 100644 Dreamer/dmc2gym/__pycache__/natural_imgsource.cpython-37.pyc create mode 100644 Dreamer/dmc2gym/__pycache__/wrappers.cpython-37.pyc create mode 100644 Dreamer/dmc2gym/natural_imgsource.py create mode 100644 Dreamer/dmc2gym/wrappers.py create mode 100644 Dreamer/dreamers.py create mode 100644 Dreamer/env_tools.py create mode 100755 Dreamer/local_dm_control_suite/README.md create mode 100755 Dreamer/local_dm_control_suite/__init__.py create mode 100644 Dreamer/local_dm_control_suite/__pycache__/__init__.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/acrobot.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/ball_in_cup.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/base.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/cartpole.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/cheetah.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/finger.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/fish.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/hopper.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/humanoid.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/humanoid_CMU.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/lqr.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/manipulator.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/pendulum.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/point_mass.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/quadruped.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/reacher.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/stacker.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/swimmer.cpython-37.pyc create mode 100644 Dreamer/local_dm_control_suite/__pycache__/walker.cpython-37.pyc create mode 100755 Dreamer/local_dm_control_suite/acrobot.py create mode 100755 Dreamer/local_dm_control_suite/acrobot.xml create mode 100755 Dreamer/local_dm_control_suite/ball_in_cup.py create mode 100755 Dreamer/local_dm_control_suite/ball_in_cup.xml create mode 100755 Dreamer/local_dm_control_suite/base.py create mode 100755 Dreamer/local_dm_control_suite/cartpole.py create mode 100755 Dreamer/local_dm_control_suite/cartpole.xml create mode 100755 Dreamer/local_dm_control_suite/cheetah.py create mode 100755 Dreamer/local_dm_control_suite/cheetah.xml create mode 100755 Dreamer/local_dm_control_suite/common/__init__.py create mode 100644 Dreamer/local_dm_control_suite/common/__pycache__/__init__.cpython-37.pyc create mode 100755 Dreamer/local_dm_control_suite/common/materials.xml create mode 100755 Dreamer/local_dm_control_suite/common/materials_white_floor.xml create mode 100755 Dreamer/local_dm_control_suite/common/skybox.xml create mode 100755 Dreamer/local_dm_control_suite/common/visual.xml create mode 100755 Dreamer/local_dm_control_suite/demos/mocap_demo.py create mode 100755 Dreamer/local_dm_control_suite/demos/zeros.amc create mode 100755 Dreamer/local_dm_control_suite/explore.py create mode 100755 Dreamer/local_dm_control_suite/finger.py create mode 100755 Dreamer/local_dm_control_suite/finger.xml create mode 100755 Dreamer/local_dm_control_suite/fish.py create mode 100755 Dreamer/local_dm_control_suite/fish.xml create mode 100755 Dreamer/local_dm_control_suite/hopper.py create mode 100755 Dreamer/local_dm_control_suite/hopper.xml create mode 100755 Dreamer/local_dm_control_suite/humanoid.py create mode 100755 Dreamer/local_dm_control_suite/humanoid.xml create mode 100755 Dreamer/local_dm_control_suite/humanoid_CMU.py create mode 100755 Dreamer/local_dm_control_suite/humanoid_CMU.xml create mode 100755 Dreamer/local_dm_control_suite/lqr.py create mode 100755 Dreamer/local_dm_control_suite/lqr.xml create mode 100755 Dreamer/local_dm_control_suite/lqr_solver.py create mode 100755 Dreamer/local_dm_control_suite/manipulator.py create mode 100755 Dreamer/local_dm_control_suite/manipulator.xml create mode 100755 Dreamer/local_dm_control_suite/pendulum.py create mode 100755 Dreamer/local_dm_control_suite/pendulum.xml create mode 100755 Dreamer/local_dm_control_suite/point_mass.py create mode 100755 Dreamer/local_dm_control_suite/point_mass.xml create mode 100755 Dreamer/local_dm_control_suite/quadruped.py create mode 100755 Dreamer/local_dm_control_suite/quadruped.xml create mode 100755 Dreamer/local_dm_control_suite/reacher.py create mode 100755 Dreamer/local_dm_control_suite/reacher.xml create mode 100755 Dreamer/local_dm_control_suite/stacker.py create mode 100755 Dreamer/local_dm_control_suite/stacker.xml create mode 100755 Dreamer/local_dm_control_suite/swimmer.py create mode 100755 Dreamer/local_dm_control_suite/swimmer.xml create mode 100755 Dreamer/local_dm_control_suite/tests/domains_test.py create mode 100755 Dreamer/local_dm_control_suite/tests/loader_test.py create mode 100755 Dreamer/local_dm_control_suite/tests/lqr_test.py create mode 100755 Dreamer/local_dm_control_suite/utils/__init__.py create mode 100755 Dreamer/local_dm_control_suite/utils/parse_amc.py create mode 100755 Dreamer/local_dm_control_suite/utils/parse_amc_test.py create mode 100755 Dreamer/local_dm_control_suite/utils/randomizers.py create mode 100755 Dreamer/local_dm_control_suite/utils/randomizers_test.py create mode 100755 Dreamer/local_dm_control_suite/walker.py create mode 100755 Dreamer/local_dm_control_suite/walker.xml create mode 100755 Dreamer/local_dm_control_suite/wrappers/__init__.py create mode 100755 Dreamer/local_dm_control_suite/wrappers/action_noise.py create mode 100755 Dreamer/local_dm_control_suite/wrappers/action_noise_test.py create mode 100755 Dreamer/local_dm_control_suite/wrappers/pixels.py create mode 100755 Dreamer/local_dm_control_suite/wrappers/pixels_test.py create mode 100644 Dreamer/models.py create mode 100644 Dreamer/run.py create mode 100644 Dreamer/tools.py create mode 100644 Dreamer/train_configs/dreamer.yaml create mode 100644 Dreamer/train_configs/inverse.yaml create mode 100644 Dreamer/train_configs/tia.yaml create mode 100644 Dreamer/wrappers.py diff --git a/Dreamer/__pycache__/dreamers.cpython-37.pyc b/Dreamer/__pycache__/dreamers.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2edd63ded5f5871269f9e23a1c8cddd29c397e48 GIT binary patch literal 26044 zcmdUY3!Ge8S?9g=es)(^zdM~y=P^kt$t0O4%uJXElVL!Z0F&WiEkjZ1d%IJW>Z+b| zZ)cL;TMm$5GA?1pbwLG@Y7`VvR6u22Wf4R`RPe>RcUMJW(V*gkmEE16zjgor@7%go z{m6qs)b0H4SLdGhJ@=gNe2@PDe@R>kTv1%kcZ0ZwYjz?RP0L)3oHN@ID`G{@8(yRnT{Vsxh>2RUbuqCE z#szaeB2SfS$K6;xUQcwAUfhd%vDIih;U(&+i$*=or@a*6eJA5)>p3@n5zl)mvT91) zfR)i9JY)9QWfQ%bBjF$W$9HucmoouUGd$zb!fPUJ^wEpd4bS|($YZhN#`_EmL)>!D zF4?81A31o~H&6NIX~hQ>^31M7CtHj5q0_ciZMuitTJ_LTWmgw%b;#1c&cUU#&t#5R zwK;dLwYYS5fo*cY62{7GqgAykR@JRKwtMhMqcvA;ICmfHWnR-`j;!=}7Xs4bY29NB zBU;C7vm$SbOd8{1?@Q5=c}%ewUvJzwS>G$IryvC5K!&!Td?&4Uc#3i>#QVED(X zhH*J#Ag<9dFPeB05nNGRu~m#1S}eu=xZ7$q96vA5XD`*9mStnkxT;!f>N!)+ov6BV zCo2uRx!|6}tk#22IZEr z{D?d6$JkRpc~tk;ADCCI#Y$MUAD74SGy3VA+KTN*7pqHtdVaZy{&Q54OI3x5(opQM zpTeS=w=oo@q&$Z7!%qf-hLObFbNqz7JV%YNNHQ3#V34GQE-S;5>wo+K6ma-xhFLI+ zW-&4lEh036aZ-B_yXO6}L;Q2z0%9BFS>*`l75GcknvpeWmSZ+nq~eZ= zNtIJ}ypvcptZi1#%AYrpnzjb4L6pu|1#1YuS*vIb<2PrGSflvOTVvKZeg~`xYZAYM z)|9mczXfaB+KS(y3s|V-BIeaB#$2zcxwv%FF!;;Kd_OLyfW_^Zo>4KtD2$5fM!cwH zo{OkyFItZ_Q=l;m%L8d~F9w=$ll4?5?U{8j3fyPiTqnQE^Z|qh5nqt_q2R74cf(k0 zMrFhu?Tkv^F?YN(;ZD}4>RTjz+B53YR&>%Z4R>p2X4R-~L;hLh*xuP8C3d=#on2OJ z)qH9Oc_!;OxVycGhn^z5$BJKyAphQ1VT*XV@v!k~v{v8QDXkh$%_8?cx!dm^KnbI6 zoCC8v$G+d#pTBc(6|Gy~6{W;6-)t(fJwFR(-l$X;$+lPGb4$y29>%0kA*po8k5uN$ z=}L9Zb(X4gwx2v%b*iqbR0+w7Q^-#;)v7Hb@VLTiP=2JjgX=gys;ktRHMdfkLGPRc zAcmR1zlb>nS^&owz`q238P3VSggK4h6zEDJSe^&F)f{^yT&dh>xKg?CItRH@F?&1a zs?mw88rbBqHtS#=UVPOwv3@P%R9d|bVnTA&P_J{Nos?yIkqMScuSOsM8E*oe$7+7k zK-o+uD@=&O1=jg$qzw+x$+0%|4TSOxK`1af1E}vjY8k8-nz+aBP&?@st(cd52L!2i zM$fT*qdwdj!HT=vG|t@aj(W*+=27F(eeIN#7;AmnOW;abiBpkw8nuqQ6G)lFN<{BW z{9)u)mV`jvVk;r(_`3Tl= zQg7p-IKUN68AA3wu3^u(XpC^hW5ef$jvdZC0}@-TI;Z-!%+!%ab@7B%z56a?`Z_LS zWk>J5LHuY`Dt&@Tka;El2zTyUb76MArDj)>vm*B$6wXrGre2JC{CIPDap|l~;Kg8- zIm?TSRczUO?S2X}j-Kk#Y7_b9uzJ-|reL>qTtAO(l51vp&RtfvdJa<5b4hL^*$-0Y zUMWmLQ9|+KOA1R!J;?NzNCa5U0vG`~Ms2aWfQ>V*^N%ynOPQyj`Bz2vt;W4ISmv3k zS7aZJj{1%Eseh(F(^)Nh25@EIbIrDBW5R z-sxu~MIKvOES=DG1}{myiv5yZ8>>$b|Lur!c7qtQ-U{Z7IfnI?F(`*O^|Y;0sf8&sBq)Q?Zp$=_RNYOW3}s;t6D%FE)!=8#E7Ku7H2?< z_|4&M79s<}M^^^At5HwNP_^P}=Q0uwN|nVm&Vhq^cz42Az(<2U9|Cw36LT4=n;)Yn zC<~|*(X%%-U&GXff&%7{#+1Vz>QpK{4x!$Nx*x^m$SgvqR&w1gv9cG;u?PI)FZ0}w zOP-2vz0R3|dBwM9nU}x~MGlEj0f|-Bf&=}7>(!5{}Iovyo86 zrTY{~m@D~ipQM7YPqWBGaYgqDJ*1MdYCLT|_560!m7a0NQw@dU3B`FKZY3dcVI3SH zcaCyA+mH_Hu@2{=X{9EORnGmm%=DPvquQ9^P)U%p36Xt6Z&vh#B_=rR)UOn+{ESgCmAJ2J> zjCR2*xRhkK)@RO{TXEmp&rkzK^rm^^;c1JgVTU)oYU1fekYi_9a@0-NcX^Pott=H;Z!`>zgrQL@OBo*Z+<>>3yOd~; zp}yUCw|hE!5h_XT%pYYP7<-2E3@^a^0OXM3U~`!5#H|&d3+-?2p{k! zy=jEE;i+z-Z^T(Pg6$BK*YZYGav_hG+$vyNy5=qSyfSq3S@Yt~h6#bN+w`CAFqq zsh+gKmpyhSCjC#hAOlqzwNo~_Diy+bB9a=afjrcN(vN{hl%IE8SS}WV;iQD;p7fJ* z5WZB)5`)Pcl@qNCR9Z_6%MKTWrFMqZ18jeo&mVNP!m)zo!=LCz)U0mr(PC?P$s5_nKzC!7IkA?Ka$K!Hk*FBg0Pk zX|^UZfs*<24t61~Uq({D1xHVypp>pbkEhg-(P8OgqT3T4r}OGZZno57w`CpHc^F>5 z^<6+G=5aYB2Dal_NP=T#E&_=V^1~SZr4XZSJJiWw-HA+?Q%r}fh?GbXzcEOWBlz8m zoJ=dC^cX^-#UV!qIdY&05VZs;I1@75#rsL+_uxX=Xl{#YB* z3HTMPERzN#IvvK|hn*N2o22B#j*+bAS81~uKsXO%Dmg~nOnsF5y2Ot`i;Q2Qoo8I`C)B5dbV{|!`k0lF9FS-$lWSALl2eG^g51*< zC0djb3T}(uovshB6gB;n!CNe!`+EJ zjy2pMl)7IMcAqrHEAGWDO45wkdyXSgLJUW0YC(_QK8f9r*c@WF1+fPt_D1Xo2i-%6 zKTL@};PUxnrI;VJ8_Rx7k;a|JYtBiiNOtwqS*-iUvbu;@s;AxeAOyA_#iH;NCu&XV zZcj2U;-|zk;JB9hFN}aO{Ba99T%!il=Hu29tVK4MR9q&w`V6x8aamfWn_w{w?JkjK zQ~fRy)$ft~KFJqIgjFZ!noz#bblS3;4wxi+Ar-=ebAPLO`T;G@`)Ovh>?QEMfJ4yE zDm#fEd-(YAhx`n9B#g^qe$7}m`X*t$&q!TIWph#LZD`)l9c6<-l%FQ^UWR_*sPAXz z$OqI9k-VRo^Y@8X*sXy2hMz@WsVT}nG8o*1BeudUiR#jl-L%v!Zq#-XnwS09eHM6n z?s2=ea1u<)o~y!iMCK0B9Ai7upHS%zb4v4(hEmk({8YeVl=`sTqJ8i_q<)CaPlxC^ko!HjoFgCxj6+2*4*rXQ z!Nj1DlGRLrDaF8=ct>+{N|=i%ItUeoZLN%SW%z#MRVd2GQG3X2c7fS=5%pS3Zm1YE z@8CD0Zp9)rskq{vMkFY!I%NwiBX@w&$X!ysgeD{%?wW_-KY~aI#Wl*M(&W-M>RGj< zf7{U}g#Y6dG>v-Bik^z7x1)}{J3zxQ>*y3#;fJ&0tI;qQ%t7iS=*3V6;(L8~U1|~n zoT)}o!-xe1ngSy05XF16uN2Hs6jrSi>-M6jXy6*7D#98?^c@GYprV47#qfk4o-^MF zTRqG_nPXHm@%P1h)IR)_hhWWsrhMA2EL4|a->5bfjr$_-m4|59Y+!jbzzW2ITONQv zuhzg+UVyw~oz)hez)n0V>r~g95p%N4R}t`3g|$+{^^+n9-EuSB0W>);)|zUSiT5%w zx4Z;{qzxq;w4AXqbt}zQ&9UKDKyKJtG3_fS&T1h@y^CcM$FMO%*Z;Sypw1Z$aZLIaqUfxomAIi{yq5=E}d)kqH1b%*W?83HA-P)_c%M{*|-FKXCPFa}{0 z($QJmK<~j`X_*VOZ^1DMEfHO%m1`WDJf1QdbWt|DnAT1p-id<^1{3*lNdzq)9MKnK`OhkzH6^W+-1hgE8Bamj&-f%rHp2IA_CCrrZ?>@ zxmRtD+D}st(;U*^HUke@X*GM})#Wk2#AF(a6+{|geUADxtK;C|)ykXf^;=%&k0a@i zaXBL(2DnR+@_r;IJcgU@%GlN zsO6)+2rVIn5q?^$i`X3CkEgcCP#nHR$5Efez^Pv&`5eh-LCRDBDeoa0Uh%z*{|6Ab z?O_gXEuB>|SGbBwgL*28ZS(ca_z99CU1=?dg@bdZ zZ`pqYw_n30Ko%obz+OcE{s?xYDGBp_Wip&AT6ynj=ApZQvyOGkbjg5gXagpAz2Hi$ z=s2VmOaudxPY_4reH|}=js@ldjdgbf3zKAh>?%#EiFQ&pDFeuzE)Ixp1QV~*>?;)` zZM6x^h=?11}3uD1f;5QV1%|9 z?W~lGAvU`ZwcxfxO)%f0{w$1;RIc(+#nP+sK#j_3W7*{d#)s<^e_y=kG8dPa$JX$6 z#>#fI0|DXfQ|tCk;G4dcJu`}T)7>)dgPC_?vGnYhyMgu)k@WjO11Z$9fGNOZ6oASh9z;CzRR`nu=4xXnxcSGtLKJyFAncn-{8!Ia<~c3ZQLi}T*4ZH11sr8pF`(W5-^XH z1$~ye0r7}#0kA_hdc;)X(4xEbrggm{5@hOGE4~G8Wee-(sohjyv~~_tomn&80;TA; zp&rg}*4A^p0IS9tBgCJ=BsYXvZ36s}eP`6lb&6Kr8tA41Tm=Uv`Xz9XU?&*7f|4|g zAz47?M<)ViZ(@KV^p%|hyZ>H+lWK7B2)o-rR?iOwFHUDGMfbcYxBZ@kd%?#)?( z&RFI6^1F(StY52hf{qAZ(vWA0b@y%#yl$Z?`isV`YDLA+E=# z^u5n+xzeZ~CGv$FMU_eJB@vhNtXJ14u^4Bw=>n^{uUvPMYw=KT86o<{~UGL3kEUx8eR z2(k(ABFd-x5Z=+hMX#I4FeU)0`-0Lik!cixD;2o10uv>|xk`n;CY_>Q!A|kD$;9we zg13c?uGPUeGQlRfK=Lw@HVL&XMG;z2oKV})Va1*Ug{Z+1b2c$iy@!L^d!9K(N85`K zS*-BcR3tT#867C*VhJ-BOWl*2O--c+Q$wkIs-T#24p;cc>CZohR)#LtR?Ir&q$%KS z#ne+4m;#Wn04fmOUrqo)o3XOz&2myuwm?M%7~6D!vCRl(Hj9}#><=Bcfi@NpOEAAe zOdh8>*B?9L90sA~#DzBpKyG2Z!oHy`f=MQTH34IVw}wJYEMSE&_Rz*yN~t6GQO_fh zRmr)b?g4?zGvEU>joVH22Bz>;b>|Y*eDyF1`G9(aNx=w@R`L0C`Vs zc(M?97}}6;NA3Y!_v3P2hzBNH;ZAkQd^+;fS3^h$R9+1S;TQuFg0Bc5cSWxN34t;y zddy~!5Xy*@+)|L}V45R{L-MAffiN~fNv#c+4rDZ_+-+zY&^nR`M_v=7;@lnt_=3;` zf=hH`8nd~KWH32Z4(-AkB7hj5Mp>p|WTZv+q$lkf=KjUuJ!jS943%o}s* zIvc5v3YcT8w-=(0F>e?EhDdw-v5!(Yhkcv<8j@g@QN3r;E+EzbL2MC#$5zDb1RR28cj5VT=mu{qpf0<;iO!w? z>N4ZatQnmd0=n^(0AHXtYoBsk@RYeV&%Uj)ck{7tk9wnkme9A5LO=OE%4`tkEZ83Q zdAm0&ecmB`-VyeBho{3OjNz!n?i=Cv9oy=Ykd(I7_e$LU5d%`yPH*R$=fJ%;1C4!G z?`@-Y&Ydr};d%tfgyRPHBj(y}=ngmHnfFM2(*T+PB8a!dc)(BEdj-0KcZC`bd3yx9 zgSI<|!;&R;ynd6n2U>?fZM;$#+8c&SR$lxU`TEURZ3BSr>_dG8tjSwCw<2_#)Xx0k z0m1xXcrbwO!~osFg+A{`zM{9^!&u1tKHwc#^W+CMJn;+LbdE}IyS-^2WZT|8)ORST z?;zglp%A)r*gL$Y*2BG8QP=HYgfo~`HzE8Sgm)r*Gs4eBn6vE`gzxZf@D3n+E5dhr zH+qK=#xvIM>Kp;2=Xq8P9-+88Dr0{4(=qi0ng-~ zq~Gh!dfNf<*@3q^=H2X#uflYQ{@+Kr1W+HTuk(D=0;mt5K?Uk-XyJZbfbS}NZw3xiR3LHklLPN_-T^wCHeOtE90_XX!}Hkf`}kE>?YmQZgpX0i=Ov z^&(bA`Bl%FvWzIbK<`f9$434G$r+MmlDCqa1;M_rE7I6ee=xWWk*CEVf-QF;O8IUN zwNqKV#@y^h<|+qZAVIGHc2gn3AKyj=^#tIi4>0@NNq&&zy(ABk{1^y;Ho8**28Q4r z8er59kZ|P75Ywdrio$jezoPxtGT85A-XA7;7fFX?mE_$dKSJ`OBor3$y$-i)8D4yw zBjFJBG_3EN_5$z&j|Whw!EOh|DvmCQVCAvG_mTVr$@@uulH{jIewyTGNW`S{L4+XS z1uX-&lnusLz)Bxtz7LZ;K=N}WKTq-tB$VL%@eOb!U77liOdVSfNzyM(rCZ>*4_yj z3X1i|$iy#?_T|w_fxRn_ZHShk^V7uq3n)tcXZGv~kn&LX{m3jKTB$q~OqpKWVHDw2 zL3g^jXbWE#rK=Rw(#P1)uaJD4$jLjNhY@o5raE#j>C3~zs(L?-`d8Tt*9-z51hlFyNR9>g!``g_k4 z)-Ml7BfK`pi%2RY$G8h1w@@x+Ly$|O0XQp4a|^)w(M%N3ZqP2kik~ME1u=#Tsi1`@ zkrH$sYmonXAXhgXc>obdy#m>StJy5X4iMb+%>cd7b}j^a70@o*PC*Te$;DprTAA2H zw$?yvJgoe23{tp6vVL2HlJ)-&P_V$F324?D;28j50q|@J`(08%U!be7R8^(}a2R(& zLcqGPvH%7KiFRx)e5?;xwry=nSaufi+fm<+HNdi+(!ws}(!jDC&?^lO3wtX$dXTq0 zVU8<-;`Sm(38g66QCdva_sLW3M>qtnEkp&dMtNL7-?*oz1t2#(-B-LgWJQ3@Xhm~x ztOH_JKP;(`eUJxI59lodV8gfAu%_vE-NbBwrw*ZAOE!{*<9Fk$f2h-@{6+A>d7Yg-Q2d8LB^HdO@f6 z;BMWRKG55}%>7lCyN{v2VCXMN?&t0QU`Pa^KWFGN$ui0JlL#;UA%5W2 zUz4nW_=Cjf9o6E`L;U3IC9M0!Am#CONe=*Ow}_7*>2$T{Y7T8Q&u8wxVQsS{+et*g z`CEqmj^yhk|C8kJNhmC+2U+ZeY$OyvWP1bj?I@GJ!Av4E{4a(?gs>SJCwVCdKDvH6 z!~dJ)Pe}O2@maDAdw_;h|Az_BkO=P=A{)TV5ci8{$$1?DePB4S{T>(&;qZO}jJppQ zhxi%X^J1Qym^SF)=a*jiS{v}X5xI!-U76X0E%$x*IhST9@R!9E!zIl5#}UAyFap1S z&&RI){yiVN_WO%LpUX$eXy1RN>tTnVKicQ#*G3IGTH}GAKNI-*AEqe*Z6?G|4|`Hj zpZ5Dh@eJ;@-ybIYV7aD`sR6#uLLi!i9(py%_sMU()euQ2R+PKAaLOlG>qnUS4|rxp zpOBbDgAe!AD;e=Bl2?$tn&dl4j+2y09w8Yf86jEswqJy}zsIE=j^b%2%DU%ihgqt$ z$vax_ao1AKGyN{uIX1;V&U%LB-R(nT*bex`Qoj=(hheO7!V?1qg-_=J?YiKxRdq}D zVqs{4>#&^(+>mL$TRDmY3LspR{bhO%csj_lF3=V(KK;ix^&iCcN#`U?MdwV^mxFmL zCl2`>Oa*;uI_@eiNq{RuCw(3YD;z|jOL9QU$rmnxOA@X~L5_zmNiPAUO4lWs3BG6< z#jq`44DkmTkPSllS!e4_wXy!J}Z9+a_hP4fgnpMskbtvKg1LL=A-$CQ!JQDFq(x~5a)L-2@ zDcV(>Lotn-w3Gf;@U>atZy5-qp{st!wFhZV?7ygI7+1JT{s{en5pn%0NqDg}s|2g2 zyNaF_Y$Zj9JKUS!obx}^P6sP36*@*!Kx8K2WXyyyG!6rRho|VR*xu?RfQ4THM6Z}5 zgB%p92#%romZDS?VW7B(#llL(S7KQCF_@Rr_*d`-Wn~WP6^3!y2%i}YRwmc}5WY@J zU}dJqahMR`Ges$vKx`2!ldbETIKmriD)iMBISxPN2IsS01(4{#uQ;O50Ra`oaA8=@)>^Y1x!$2pb7a6_#Mxu5ZO#4 zBVNi9uVb41no+P@*?Yb3J;WJpMLQwOdM}#Zgk=FS(fM}cjOZ((B{%^_MwHO?qW03lG=bO? z4^}2%Vd;>C(WjNfb_`|!Hj~r7EC>au)lhf=i^{LL2UA*eKLE>gAn3@a$tI8I*_0&` z{{%l7p5jV5;8Eiz%3z87$W|Xzz}^ob40hYs*J69PJpx}%k$q_wyf z2Tx<$$k>W~j168k!Bcfv1`X!4fmCskbFGdZDYQXzI?P%fK_gq51+k>5Jgsv zkJgYcf=_`b(W7yAGs&K}cvJ9^;eaEI2#zorL|bIRN=eAosW^Z3Wq(xP!*({(JWdl(E+;_#@tH){}szPnRGd; zxw1BBhb$Ua>>xoib-tByR$F6r)~rB3^-fR@J{i_+_StNE3C^i+kYoVV6y{1CJ24k9aXRN&6$+_BV$Us+}Am zjUrUL5Z-KX&V(&}8yh*>Yh-0DSW(@;ezou*ig@twIoFKyj_z|l3&5fx2yx9cxzb2@ zy-t3s-4F(Q2(5~Y{u(~rb$6rP>&D9ScV6{x_93_?XRn%Nry}kVM{dH+|E+^B6^&%y z1pHF%NB$?e4+P-lZ5!o?>wsXc+<*0FoIOel@-+rx%{z_bl{maOwXO$t`mCP(4ZSBX zjj02C5ijOkD3jbvB6{5sCfvc>J4x;$5&Qgm7}`&ABeM{drw)-ECb@~^kC}Rc+rO?G@Q0z4LN1RFKIq5W`D4JaMJmb^o* zmCX!F>-Z|5hTLB9fy(V{_BkZaC3ywf>4S>W@~Z)&R2+LKQFSM8!`^n0iy~dzg(Qxz zoW7chrc|lv6ⅆ>B)J8bw4iWYbXi) z5^!lar0ZsW8vwnOIx2mJ{5qB2gu%2YW^0YOq!}!Z9Q=m`u|!1+9&T zaAj}^S2onLwRQzu4+Uh`0{j0@>w&7oXlawyk8eH^-) zu4O_F<(h<%aH{W6E;){*zV)hya?MCPMHv$wzO@bGLHsA43I5#gLVO`Sa!Vh|g=a&r zw)Y*%wL@~SCC9I3?p5#0Fyqn}lNPd(=xc_?@dXVJDNa$eH-ih}y^b;iSC;3T|pCb7H$xoB~ z42c*BKgiIBNW?+=07E}V^7ACWK=Kig4d+H(5qwqd9>PLAlSV#$jJ5m<$;U}PL9!N= zq&~&CPm{>t$hdrlx4%vzgY#L2euLyUNq&pub0nW9`E8QlAqlqu{cXSn#63yAXd%Fp zoBGdfieZz|u%Carvzt~%*K^*rm>o`~6fWVrZ^v}|=f&*Q1%qzE#tFR0C>)JZIi88= znQ@};*TwTl-DG`IeJn`l7s4n9#0*O!cZJ6@v1E`E*0=@n#IcNoZLws4*Fmn0$2Jk6 zJrN$;Gzl)b7tW(u!~%&9*Tl0uwrNX%PWKQK!D$&3YjU4$)CJVFr3x`Po+||(^`Y@z*=#Q(zJwC-HeFRna!9B2) zyKoPld2}6TA8lE`RQqroz@%JV8oi3pX=7QRXAfnuM{?2B-HLIg2|Z$8Lcjen0Jfc$ z!ZAIM%bAJQ6SYRoJ*!YoZvg&mFUP@C_%9ZmgNwB@K&bW*J5lDnkF{lbdG!t@s>C6t zbC}dDiD(B@CH%DAJYA9h2tl9AOSMNIXP$US4xf@wr9?WGp7*g;VX!$?k~*THIrK~$ c&Mpvr6c3Lg5C269k<>u(3F8T)SRgI`KiAjyv;Y7A literal 0 HcmV?d00001 diff --git a/Dreamer/__pycache__/env_tools.cpython-37.pyc b/Dreamer/__pycache__/env_tools.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bebf4fca835063b2186961c828e9570055233f5f GIT binary patch literal 3111 zcmZ`*&2JpH6(>2fv$M0GvQ*o$-9jjuq8kK~oA%PcaE;nY+9FmEJ805Hi|u$gyPDN} zmE=nHVz!6MfOQGdo&y8B$Nne%1A5{qXfDmUx19PTN0Jo>odL-=Z}`Z^_ulV4{;Jyz z2%djkJ>vgt6Y`b$IDZ(3duZ}c=s4jtC8K(zBWl0dh}qE@xfog6$h=XL)^q%kZ~wJM zE$(plIT;1K!99#^-sC>U4sY=QW0%v%WZ3>1yO8}M)t&fh8H*%~a}}nM(ZW0~S_|zy znk>;NvZ8A;V@p<%m9r*GqTE%3(>4A4x75&H1)a7@#ny~FHwkyI6T`VfRI}DZxig

y+Z*o*U{ zu_}sG?v~HAe|kS9W1h1~0Ewrgpr;!>JDrHw)=egHaZpfgVg?II%I!uD1(wx)kz z%f`}Eh)bj63FQotM`z9!SvG6l%0;~Ts!<_I*W?SrovMl6t$g12!sXse?}SoPx!^Tl zGIdGngie^{``~|ThY91Ysxe_xXW3GIg{VGZ%fQ4o4{nlGz}rB^PnPYfy=tq@s;hdu zQw5k$tJXDW35#~G!LIaj+&ioOntc9i;At@1m|a ztmY!$Svb2tooKqdXztY<5cN#$pL==H&0-~zNbb&Lk*Au5+BH7ZG`N7Ki6s+?7Mec< zF$FmPcmqD8OQy(-u8`9zdd=pIzq3EP(3LJ+nPhuIN0@lkO*zl9P(0I~1nNS8sAIdg zH9gey=(R}tCb*L(QW<>R{V`;1AbDi*M!iI`h|fdxGKSA3fZkZDdh><1ca-qB5as~G z?}-~2#7(1)12c$QmcpvKQHms2z(JzYST`nV5h@$F+DnTmB3`>i8RxojED{B18&d(Y z^Q@F&)2tGNa?ImNI8T*5ulSMK${;}dGFD@2(XLfyd}fJZM>o%$P~++QX0~T{5ZVLi zC&^Q>W0(yKD(GUptQ|O0H_zNAj6XD;ykA@2o0q=*Wvgmk`VI_uxO2$F zBknFcys_*mHgi@k0N}0JvX|e+|0ez)@FxC~Wf!fN-vHMK_b>Q*#dTHVuT*)nX608w z)vG#H7y9_7C}M1#HNf_>_Oh?qt4`HlQ%iNLKK}O50Br2rpOWREYKVVVgH`_polxH4 z-RHgms^yRu^aXq2OkBYBjF0sORlgcwjg7TqSG`iNU-c32{e4Zx7Mi0b7s*kQBdc4F zC;x%<7L7M|v-0P80rcG9SIr~?$+eSir=_Z4=; zJ79=kn9kl6E|mDlu#6JJO;g+DApljTiPEfq^~w&^W_{U$5Z*xaLH{;m*( zK+(&hx2MlC?V1ya_py#~1}-yW32WC`#6K2ci5r;Iy$28Pzy1F2AAV9Zbn9c>%05Uk zY!@hyO literal 0 HcmV?d00001 diff --git a/Dreamer/__pycache__/models.cpython-37.pyc b/Dreamer/__pycache__/models.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2aafc9a66420bc35209b2c7e617541c1b9dba808 GIT binary patch literal 11345 zcmds7TW}=TS?>Gv+$615tG#5ir817=0cY1MJBET~t=Q{02CRhGAu!mCXXmt}(auFa z-P()jF-3Nrq*7%Q5^lK=Mxlx-o_XRCUZ5x*Qattx1>r@-6GidjDlXsmpPrtPR_k>I zQxuGJ&YV8yKRu_<|Nq~AIsII{Ua|1``+s{&{lzCN>xaCUd^|MPP~sn;QkJrNR@0Vm zr|II`>3My>8Q2_id!>HZEKA$#Rr=LtP1<&^-mJf9DPINmEEPoc?YX;da~?e<6{081 zdlt}BRu%MA@}3jusj3=!YI)Bhdg^KpJ#%@_N%YLC1@tWBJxl00p%&4znD?AQ&q=j} zo~68JS=p~xjZ+7BDr>!AXF;!hC(bcX|W)S}xvk?!`RLDKGZIAQYP z=I|50g({MbX*$YQj&dS*+uQaxysuarwsMtsuiOmG1~f|;3)PYeRr#KM*KU^O`jo1u zs$5r8P1WyN&8nJH^Z2gG_<}ld&jNK_t`^lvxtjZmhr5yY< zs*aC}AAY-eiJwLzvF=)9d*8CH)Jv_FliH}=)Isf~E^0ps#--HS_VzgvDhv0w*Bj-` zk9V;ZS=efI2i>IA%Brn)C+Xg7bFXj_C&SKGEAH+^&1y?UNu=c>tF^YeN<{;6W~ADK1<;)^?XbQ#ZELy1{gcEt|zzr8xQ>*C}|Iu6J2 z;ZAYV^CP$jZg+s?>I+{7FE+^y|k(@C9u=V8~NERC^$nB(lb%6`YgPCDxiFZ23Q zdyu(tqVx&;h+ag64fb|))XD0bz2WtCueCGmbvt*mdS}?**-fIBO784LnWws)By$pM z)n25Bv99t4^Qf{Dn^7XH>0)-Aw3Dde>IIHeuD6q%HP~9-M=x`lRHsQ%B1q9PVIhf75xBGUaNL%CqW^q1A9Sfgmd< zx;=<@hA{*ob1pua**7x#<`LGvSGhuwqLf|zIA+O`18Z;P0ZT53|7RAhf1D5C>Yu=u zeu5Q@QZA{5xay1OdmJTSY@?)qj8yW}e!SI&fb%|?pA3h+IIB_{x}8?sf!g`qBPExdKX$<%ITfE)zM4btaXb~_5NK-Z_ zGlI2&?m!dAm0UxhKai2Y$g+_X3d#RA`&PLDRF6W|y8ZTMcQC6Sg{(EIQwHlxn3Pqz zad!Y_4LXq~AF{bm#n3J?@dd5HHS%9S$?7tP!)||5G+6V88=g^H8btj`RzJz=K2S6( zqAbR9sHS{g0Iw&oXowDBiw|1i@EJ5jZv_X*xwF=$qoBRuiMKw5|AYU;XQH@4XE^Az z6I>F$%>s|CEhN})MD2vDmwmMC+^S7Rj$aYw_2`OZHSW6RYV}!EfH^nWP}%a!yFFTX z-Gx@H4|TuYyVh-QK6GL9qJHYwg>j@XAoM&7W%v_ZPzgkk0rp}*JVJ8nV#{D~M?S_~ z1w*`z)^7-V5Q+(>qM zyRm+b^g-P2@ARTZ#o$^6zQj%A1!Pre8-&wO@m`en%)WheP0w)bu%PM3(8oo_?6;g6 zfNsvN+2^4E3ut?|%HM|CQjU~h z{cYtzXY73ktaC>m7~-F`@XdL_1LirZ#1TbgoQQ(l6DuggO9^vP@JB&ni6{gRg)&3| zLIqLq(?C@UQ7G{m!r-YY#2`#V=x#07U5Y^F0bDMN7biwou2-Aved zUt?=33-tGl=l23zpJw$UD~f{t45~)iV8=`B_&HWT&q~C$kV+$o$3UNmC8x&cP+2f( zHV}t>Gal{CfRtx_4ZjbF+tM%q?yJtp>&~xRP8})X@R~L}a z!j0X5SYu5H(N|bK%Ss-O@+iy8>UlW80HW2J$}Md5DH73EYa%h#N9=UyoD0LUXV>Z| z<pm9Lc zRMeLV4Z#h6InCps2|)9fVjMT2IeZ^4@xO#no(o^szl7E3FQ7W$0uMq~+=YWzS&29W zna5}^3lQ1uBwL8^Xs#m@)0JAo-9&Z!V9AY04^+Syy0+5G$CI!XbbO7{Lg&`97#b8nRM z4r4nsUIB3z3DC=$5>$~=83&sa{v>gC(N{HXNuVwOQyP`tG3|{KI3mzKO z0##Y4_zM%N(lJyOp{mYMG5$DuubNQRXQ<{5Q(^U06vU=nH*gHe7OopJYMVB2HpJ>Z zc}6$&#MaNR=Yc|2V!A_tR~6caJeK!`AJ8MB+rhuXGs&E-%hMc5 zqoIEVRiiHMm?qDRKVH(2c;s-#cOeSmzD6-aX!RKE6dm$udCWd4EN0qzBh>4>?`K&t_@99& zPZt=q!;z_xl$JmZm8FRiQ;9-n>4aua2c`H>sn$?_Bxt!73ABi!B0>_Sm5Td*Jj{Fp zBgexGM4d45(~4p}fe4SmLjfbN5{^LUg6_Jaw>6=IpTz844mVHabY;+eO;u*-s)t}{ z3OO*IjCf~ohYETSu&*0^qe%H76~Y9Y1Zv`|0=9E_F%Hec z;Kc#FFo`$|FK~&D0EeS0LPeKq0z&u0i=(Qf0!@`}7QT^y1kY_%f5$g~W>$;3hEh)% zo&qLwfTA1)Vi9GXhk z!*NKzi8~rs>47k$L6V|OVTe5+3Jl4P=IedA9pCu>2t_*P7ALG81Vwrmqq9(?MV>_b z#DcxS@ehnc-v{->aOezWd1mMD$0CAf;o`jZe;A8YZGuLpAB0CWpl40hfIZ$MF)S$M z5us4#-Up?Ky*%Q0e>y*zA1|Ps$kA+Zf@XPY`M-f^`W85K2+~e|P)O6aId$qOiP%X5 zco@s{9n$Tw`gK%!u2T~gj$jv#BB;kltRu1I&rsq=P#uq8D}rekfN8+7W2E!L!Lkv4 zevC53GWHz8G6x-NBw0ZLDa)T0GW8m0duhy8mE}iszi9k4MP?% zL6pacg0Wp{GwadvBo1@dGk>EuMB2~CNmu@}m#58D9vEo^)I23Z_H#H8*vWCG8BS&t zt<|hNB|XAn`agfm1rjnW39AszuRLIHrf9}_Wo`;L%`$r}Vx z`?fi@!4(qNa)s<$>LEm8h*e^it!S5#rwfs4DT!V?BYKi$Ek|5VRdqk0-*UM(eR)b||(WxP)hK0-io-Z^j0#TtiRmIbymEMPqV)icQ^aPuF}8BNnc`DCEmSmk{>Y~ ziDAch9NaSFv@lS~yiOZzy~oZqR)~!GU##CEt-0Qfqs!Cgs!0bPkrZOV-=X9Y)dKt(=7Jc2pF{R9lq}%; zNzMcbf+NvYR%*4>u!GDEg;77ypZN`p{uLJ>7S8pYFwpEHbBd3P)*6Z&noUr^WNdH* zk1${IHO$*s&64uTp~FvF6zXX+js<*!fO{mkCVye+_I zV}2(2?ZZ>wj5K}cX@bC{KEKXBhJHwW@9+qAsEpIK#(_d-CQg@FIy&e$?Sn&q`#1x4 zfZ>>_LCby;Li{Kk^kcSqBE1jJ4d`FQEgmAnJRJWAl-!Oj1e!g^OE68wM1qx_%P^Vd ziVVx~9%R^9vznX|&e{ zf8tAALR<6R`nO4e*AjqY50dGB1xdcu#Tyu>W>LDhO>WMQH2hp>5fiA?gQyb58k4Z;T7K5TQB@ z3Ejf7337#|1e(9hX-?AM%~B*wq~R&vi=T7`{)5^zU=h5vkf22l&l@;qMalLT9n0*>gyhm`qwjiTVD(QB|TM zDR;w{b8;r}<&6?36i;=^S^ZOT^f1w=Do&WUT1KrlKk@wefBwg3&OIw3VuxVO55Ef~ zdm0z-uVgq$!aG(tO|n`6ju)*~X<|k~=o$0ohSO9$Ei4n*3tRmz-k?ep971{hip^?^ z)}604;NZ_3_~1P%-lEV<{T<$iirFkp?hsCH(5y`7ALTC}3vcsT@d6r_JJDP7y|;X5 zo9B-^QCD3!^v z;G2(8so@hcFZoYG4b?_IjG3<5k_97Vj{52_KrS+nfy9V7>cjYaQb&f0l=C0K3YM&q zpVYTcj2FQc2Wfuh)K2mkFj*R(;;|ppzl8R3T1rlj&+PLcfY)bnR!q~$-g7)h9+d{q zU~W|b0={l*00W*v)`i|$v8L1GkEE_phspW0L@-F@^vL*}S`gunuz3a+KE{@~oxyMr zZ4HygM@7@=PHy7d?DmfSK1n_RK!_-fNi5hPQR5%uGV>_nGe?N~q*-IN$KlhGKihy| ziPW{3mm}rfimdi3CO_BRjyeh6xgf4&l~?2-nzUKvvxDd}!$ju2fkC9$DNYiJHfy?l ztB_@rP7|OxLyEJkM0StCol}%cVx)yA)F4mAIfpF&GXCh9(m&;SWerC~_si;s14Q4$ z&zHCm>T|4~PK(@ve{=JL|*_kONt7fFM9FB)dtF0Ctj1TnQ2*77#PnI&rdz zl|%Wy|5a7@jL0>-L)7b6@4b4*|Ni^^kLM;QO9uWf{@`1|W6v1IZ}KJkbC5ZUC;GrN zjKB!Ywo%o;X4RB;t7^%wUA5)csXFrOR$ck^s-FB7s=oXdt0nm@cP1>uG&+;jDO2}2 zU7bOV)jrUftsayzyFJ~Rs~$3q&l`agxNjSQ8_sXr)x&uAf&$(PVX1WlB}W23D86k} zj|QcnjNfCy3i;_EogTWk1P6UU7dHmiL{CIFAIEtDI z`aKrhfqw7UwyJmI?Ran}-tNTPJ$O42+=aKh@b($JRf4)_b2t^;6MP2! z-G{n+gOjK`na=M1;8buQY8HYgg8PF7jPyY8Wbj~c8oAGgpA8SS9;}@Xo(dL&hfuN@ zE~4b2+G+IjaPSEFc?2UqjJHRF$ME(T-X00ei^js!H@Qzs3uZDAZuCRd?1bI8-p2c6 z(2Qc$Twafxy>8TCv+U18<}9A5f+RG!ja4hOTTazQ>QxJY8CaOCAJ`beN&IKySet38JZCmdwFyHrd6z!O}u8@oTj%I;ho>QmxkM1?z3*Cu+4T>-Bb8 zM{G6t8Kk0m52#c3VD6A@az<(Zx(Ww+jsd(kM{ zk8}_0zz!Vj1&}}R@aqNzz|v2=FUlJ79pOL(o$Sb2WMZRb?pV5)q0zE;E$of6;||ST zbH@wpUGx3fEjupk_}DGfng;jjOU5h4m-T)*OAS!G{PCAte-TgQBN-Yy#%{i;Zg;)Y z-^A{k8~fIu_-ESn&T>$H<^;a_9X!V1PCfI*sD3qEJk#zq>h0(mWOTd4y0(n%vf`D* z>4s69Om^!18Zddi5wEK-DPFCsW_`IGMoFPxk1w~I%Zc+cGKtgchuy^MHydm1FmVFR zX~9v1d*ZJ&+aWhO@vhd}>-g4O**{Z<^SKkPWvdKx&YU#|<*~W`Fi`K90#<+iV2sk9%e#f97}{7 z%%P0HOu2#Y`mSQx9zr7}KgUK@Bm_nbV5s96CGS|f#(85KxFB^l>K@lTVwjaX0HBNV zS7R3-^0rN&9fakswc`gqu_!14WTg>(6aE}l5RO|;$iF%JhP7o1L$|g8=by2zI2ZMj zC~iocwP?N5sjE$Jb8GS8#9T|vtNYgcap?@XYNOZbKl3Phj)>fzF{q4xKZcZf;~XaZ zAfFtewaUp&Y?)ZUxo!UO@oA|?P!F=;NQ%hx>Dja)7G*Z9{=H_CH%{E>a=jl8tOqMX za|7r8g$FAio1X)(Dm9~KH;U`sMyRIIY2s3NM4%u_;LozeMn*lsTtS8XcD)gzLrCZT zf>|x*%x&LfWiiC1i1`6Fv5s*+rpL&3JgezjDOYC*VCUvI|?rs5Dpmz6FQ08}U5H3e`fy>?Owz}M}D>L-v* znFp6VIKHat{mU7~jW9+UW6O!Q)hUrgwUza5gK7^b{~U_dStidi;qZ#oCF3pFV^z{p zCNQ~YsHf37RWD{7bsldwg=UG9{38e8XF{{D0G^s=W5AH&V4Ku?A_h%vn_EWrh}1eL zdvIuUeQ%D1<-mUzdOx#W zYo!lN^##+|nlO#Qx%keYIGotVcQUIIXB80n#>P!&hJJTWV5~C6DxzPALtkX;QHBz`9;AZ)b6kj!?}SigP1Uc} z`peY=7hze{+s#3EsdrI-TX^xM=aqsaExdG5XWTwIjnxwRSeR4~a;fgsW)Sv-@)CQc z54G!DVqQwj7ZUTe#C$ICRuq)7NXfYTFfqe@OfZ?0J~Fx|k&3}V1+8Qqfml9<$1;zb zK5`Q%Jz~y-UsO!hLEBuXB9&5L3zgo34J2!98yyI^jIW&W* zuRFo?(EYj*%nTi*2ZjZtvmX{=pl0=RAKDnr+@hZDLxO_`Kn=pb;Ly9yRuPsLq(m$xQRKZ)5KiKl{j%mA|(LK_%P>`VuTcg6Xm^Q*;S85#nWsv}F9$gISDS(Iz%R}#z|FG&ZQ_7$ZYHi;U4Ha& zaO9jeJ~k_ZQz;pp-mIKnsQ|BJTPvrp5=R?(!N5MPDg$Tn;YU{nPUSSHkQ(lwR5{&m zZq!yfab<9*a=KYLjb55<$c-9hc{|(~{N(EoKYjJhsNZhJuW!8h`ryr53?vqtJkA&P~@~gDwWe<9FWaH6Sf3?dh4NUbBQeDhY^_Vo6P|T~BnS2FF;h?DPo5XH(g2dsxgbWj#v$rwzeMF`H z5c+uyPxJs10|Yb$-C~Zv&~~Rx2LyEj&#aKr9LUO-cloYCO@n_kc&KrZcH67cI&s1N-) zgEe?}Sw1WrF`h!*l{df&0KFpyp@tMKG3Vln2Qr^p0-(iJL_$r{YalXlk431Oem3vbO-q* z?NFJ?+?ELdl4Db6FyBpQvtV6-dEzg_v9JaqhrA0h8OEtqkeE#rIbru|YW7ieQ=HdV z**k~wMlc{ih~hFb4LmW7PTYWww_^%&VPql-H*FX5(MHmOwP)^xqVzz$F|%HVXnoWe zvtBd=we6CaJ?P4hp}B@&KFMtqWx%`d7QoBRCjc}vbg*)=BXYHRV-5Xsj{pR_w;W}o z$N)cv^6>#~wLAF$8e=H5_sN2M=|mt8&qX|9mB`ZPQv@@4->ZNZd30?%YNz@v`-QnB z9;UVm#kVZqL+j&23ZTs<Jmr`S%3S<5hz1FH|15KZ8+O^LRT zw}_-;SUx1gEdEWw!8v1A@Xoh6bIP8FYBmFfY~G@3<|8+QbjF;q)Gy)799u}>#F*#s zM8r@!y=XvfK~WRVsCl{-!?jGF%G5HppnB0Ov|@piIZFfgfyHirkoQVtAOQ<9kbngl zPH5+WI7m5$70_922Ii3rG3<}c(^`SyB7uqDz>iq0oy3C;0WBlPm+r0bO+p{CdB~u2i;ZlcBdZ9s9Mmw8mS#N{WDC6zB-W& zn%X=5>0KV}GZ(YqUxbXHHy(e(DgY5JVt!Xtl0rxUOis-iO{D5MA-a&Lq4G z=OE^$S`cb0&}`qM_6_j{e-(4y&H?(Y-k*bj^LJ)Zb5LuyBR|9_cjZy$fe6K=OY{Qv z>k5QJYS{oYto7CglPAA&vU4)Hbn=ChUp#pcTN%Ow7^q*yvY~YXb2M8!iuZ-NkzW+X z)JDJF4WKEi#Ofp0mE*45}{E#}oTE;2*E^FY*C$f+oV0vCnhnyO$5in20saP$$tOZ%5tvcr-h#{oHa`s(NT zUV@p`jhib?sCs{vtvG0=A<-=u;9o{u>zesJQ$IrStHsQZ0A0Zw7NFvxE}j5hp=YXy8{ZzfJCpI$5O@n8JJrIUzhR;+ z)V-sS&^O=%gOb^er{d{&1~WRq@Az-4JS<;_N(IYmBJe~NeGu~~3@3o?{;-U+c%7JE z`Va~!?6F@H#q|RGE$+@iC}-tei=L7jW_Ko@OUov9v4R_>xbG||iW&-sGALX(UWE_t z%CEg$F{_TmZ~S5p4>bVV5JpmK4o+3$xzCA!>GaaLRDtyDCF zgLn1*kIjb$*21C?Wxg^S)1Y2)kkh#biLC5crkGxbtMa;e*r1rd#VS*!;xT+D9%3w; z@=w!^FjDeRS?_m{(d#uA=10sx5e}MoJ@{gRUMDHPBEJ_YVbxz`b%n_SlElHeW|S14 z-)Mw=N_uT2(!H5@jmtgAV$B+G+_e!WjyOZ_Wv$(T;aK1Z^sA6i6@8Jg1<47j;9XIp z-j#`03(NI39niu&0`*;f?>6d<%i-8L4HYT%52Y&Z;gmQj*00u^ZJwrS46yaCkKiHV zWOC0|{{?&>aUNtC6EGDW5$G23CG%e7ry%wx@NXWEhdf38aTt@xLp5NH`set%!7j2t zZs}P((HSKBPe4N$aG(QF9nPoIb5P{rT?u@gh>AHTr`7OVEKDT+dHBILpYLAH5r$ek zM|h4SP$SenZjot_bTvDGl&isb#rJmV^Dxz~q9J^JFXKnxsr~_SNdZ1F*>YFB+aMI8 z;(i{B)8=Nk8P{t67?qjx*c=?lrnZ>X=W`^fQU#JZQmf`1wcK5`k%tQOE-bGAuKY|M zkILtaEh}(ir$rwX)T3RiiK9n(FW{IFCq4__lDdLkMcu`uFERglCLC+Q*UO{=tG>+S zXOQ3=Y!lj!I834Ep6hKS&Uu{WWT&cj8)DD73>T~VSNukZC#Rs~0L+U}U4=ih*R4q> zgs4_Tta(TnoUb0s0rIKFIb^tVgpNSwabz@P;5Hyspp}#js&jFWfsKi`q-l?K?A*%f6b=0-r9e|(tpcj-vTGJqO8~UFVR3BS3u9j$`0i# z8*h~_%0hEKvM8E`aFH7|DqC9Zf+)hItrQt;#qQN_Ai;h>?6*U(EkV2wSh06v>W`s} zvrdmsczQXQ%I722aIXEalN`Ed@q#Hp|AXddAtz@TkkIgu4$-XT1lALncMYc*$lGf; zg@BZYV}UC8&|Sd+A(U{fLtQ{;&ELZAfrx$sdG!&Ke}N<^z}dCZQyp0r_Ci+j=U6Sv z`4)4(z=lqS?hn z0h!oPV}s?WtiFdP{|Jv92>X`rOcW+OoZYg#fhYg7kvoegI)^0ZW7A^5m{l9xOM;ll z$#nPOY`5 zBaa7>-@mwnDEk2(J*YXD9~-n*OM9u+_RwlhA`~B*L`WXAP9@d@%x9M~$tc#9S!k61 zkDLR)OUfz?)*E516V@5LA!z@fS-Tg*6Ne3c7ZoY|r_4Q&VQoG;LWLa(WFAIl@9g9a z$B39|8vcVIAdj&ruzI23j?KBSZ_bI|?yc59EX(?E)mufF4eTEeF%zBoM#djW)=2?P zrRAqq0w=})MT`6>cVgdK=>8|}@Q-WBA$U|DsQ-#SwAbbP z$c@if{SK@4&9*={ORv9)!^_cx8B;cfA!!o?v#FcxvveySqy9a67vv+hso!Su-L+tNB*RnQ(mDcfe&>)4-Ge5lmOb7|jC#9FV;S0T({AXzo@5%RxI7jhI#RG0w@*g}qHe3M^7tq0tnj0hX z3GX=Wt`$=?3%tn#(llY7ue zqm3Xa_`h)65jLumbe`1PwSKSNY;0C1c`CFXhc$h!>gnHX|Nkd?QU8m{Xd`mAR~qd- zoK#?ZBLMt97$5nI^!`H_-!wZ^4fq6Q z^PDBIiw4GEdTgFVlUySFNE1~558Zxhe!?a)z$DzgAW8|px> z8`duOVoh7*Ot%@CRd?TRWb*C|VW?df@H~enqERt4RcQ#7ylbsOLFekpBi<9matGd9 zNF*qgP&UCS=);xZR7a!}+($U0GFk{(!$}s7x`AYn@xGqzN9bIUMJqT}QpxHx5Gqlf zjN)FuhEwRENj6Qn1Lc#_5kjj&C#>FC>ETukfED+0wG+r_1GgmR>)^l`9!8>)e3~Y%?(i z23dlGfYTofO{6-vUnG&#w0soK%6-=Aia>Tmmm>*9p5}83v&>i}HhElQV)`PL*^2UJ(}tbX2pG+VU=sRv~@7cYwuJMmkR<6PN1U%jcRc@q}MMPz_%9 zNFu_X+#s4q65|Amrl3e8>s>pIu+7YR+#G;GL7#68-j$lLq8c152-llLG)$8|-I~ON zl8I7(j@Dr-C_*sf5Cy>EFAkz+B(=6q#* z;wi$+K&!FLAvWdw-o%@O$HfCj!>(w($QK|O;Go#0c4k8(HsQ*p#>pI3K!*g6->?p6 zX%$`L@3%j<`n`Yl(f2?3bV&tog_7W1g5A9Y@6;i+-#oZ0YalUAW2Xe^ZI4UrM1c!~9v7x`fTJtySbd%Ieu@b_--#W!Dk<~iyV2Z5!27&k@N7802Oz*b|;DW6nBz#hI^?1 zEr|IxkvPu=_0H1A*5U&VDrfxTF9}{4{RL#k9Y^ru(R{<1ZOsPq!_%j)t#%VN^u3PvH=gz?gBKx(^k-Ae9XATQCZcI{e#+gH~D+}nYw+m+3%mU=i9 z=^(UJMd~B~kT^AzsQ-hBRcF7@i=vA#0mGNFM(7~F0R%oteju_qDWtbQ605D$t0=}z zqM%wFjkZ?~h>%|424x}uceCUlw`nj*xQ&D~+Q}b(Nj?HQ?l@Tpy`>-)I-wL*G<{3v zmInh;)#s?}1rDz%87E&+uGes~d}JWPf&&RRxkM8Tabn z_N+$>C-&0AcWh{j0j7lyT^>jx63%KB5%=JOd$!|}etQct?EP7Wz_!ZZR3gcE)Tpn9 zF{qn9#+Cto#p@+y2AzYixtIq+%_Yxq4yf-UgdInn%!4m6HqIr7YaLV{q3zrb4mIUa ze;#EBKut$H5+9XsjBSj}004ltIJ&BmF|Uh&i728H&{2}8`0;2ekma3df-PYzN#bej z*rzB-+}G7?J8&!I?sVIMau8MH{0jaLxiym8cbo{xDB?qng*Rrv{~0`i3lzJi9MmGh zNu!3X4-6Tc83>-?Tn1oT$#DuPTtREUt=?RTr@8xYFDS>9wi@bhXGWH z2%#03wN;&G)jF=4N@Z3IuGwyO>+RKQId#!=>G)q@X9qI{ZoK9J)FdT(r{(f$SUt#o zd20r~AcRSDV{qhZ32_#yVU7Eg9IVy*{q|<^}5jcn_qy&!b>^uso{YR0^P)F zd51xTY6h)Y<5FwM%K0_$P_B25^oLy-cUNXe&A>`;X>iZ8fLs%2@EUd%fn6o;RdSB> zwE7O_Gq6rqa?Hnyg$auOKqiQ>f&HS@f(e?6TgdoAbb>cw(ZYP$R<0(#iw0UjctGRy zr#En%_VmqAY+i#M5QTAF65(x8SXYh9>a*-kFqSqOgMEO^0Rm~f^*-&n%<;qs9eNcv z)lc#ZPAQw7%b9kb&NO=As7XxRtJV>ttQq#Q`5&#k+yC8@Q4VHQm2T!^T z{Fs*q7u*p~#n8gaa-K@`g92>nI525$FD32;+|)&c!6miM?S~jCSgA%kd=oB*J5ltEMK7n$5L-tpY`yu`R$*g}IYl`+NV4?~;7C`cn z^Z|8AUyB<<3L+xHGBQ|wfZp=ke+;Cc_UN1H^C)jn`tXldd(J(+jhCUdK1U!U&L6g#poAV;BR;U< zdOmp0<96|Rdj0w~qiVL6;~{a7t}MYrDik_yISZHG!M5&>N8E$Mdf=O2bzpE*a@2cB z60?0PDEz_*3i*HpNPX~QT%#xp)YrzjuUl52eQV! zLsD%DAkNarY*Y4vM2+SW68(FP_lu+PAS&duJNWJ6+z7f0v_kV!^2!Qo1l1W_2VBE{ z_;6(%SLEROi@?jzvi2Udli+F+1d1cMiYRRzEuP_kZ2w(o{&RRl zEi~X+nui#1K(~m3##2F_)}UfLy;&b&#GIZ<*F-nr^^3@8>Vh`|d%`PbyrKpmp-6uoMMp$yIF4ctF+aly_RG7QC| zIff~ucQjvME`MFHQjOTxNI_>qbp~!O97m20W3=*{L$X@t1+DB*@)ABt4;R#z(6Pje zX&`-*V~KdrWjr8-k5fqfRWwh{&N*NNgX9=U2flt0H4S{7|B*(|;*o1Fxd%WjKse7O zEUt=GbG?cCJp3q|xl+kb`3T6#L$#eR7W-I$h({+lBFxqR9#v_BDK?Zwei2XX3P6Ij!bJQGLz~(hqJt-kZ@-9I}kp8%c zNXcL?UAlmoy-G0uK_uxc)wAe${~W1meFsl2Ha&orC%MkpY_3b+MoCLIgF7RQmjbO8gZFQfHsh9iu=hbKSdAei$( ze?<7ch7Ny~ou=@$6-yXY^E_q>F^4xz#Y@163-Y>PUO)x+Xi3hm_P#u-{u)2|8%#up zyPLUZ(2x4-%q2_?F?ktDQiA0W^%Q)K*R*$PZd9;H`@bG9UVVg)MP?K0>LcoPd{>Ie9?T3k*K&T!u+J3|w%x}vjDYz$E* zwJdrCW!3pF;(uyriUAmhOCmeYH|wkVaF7g2eau;r7G!(I5HY6|{J%fi;X+6o5t%P< z2&Mnzwh(U!i7(WiDa{`8OTJ$^xnus(*$4enX{LnpfKu7F Hr)K^?mEWhf literal 0 HcmV?d00001 diff --git a/Dreamer/__pycache__/tools.cpython-38.pyc b/Dreamer/__pycache__/tools.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..babb8e96731fb3ae9602c00dedf6eebbec0a79ed GIT binary patch literal 19696 zcmbt+3yfUXdER}`otb@cxmW>9iZa3Gk){oddc!J%LdZwhGl zaBvsey=&7d9>vp<;BGwKji>wYbWd1R*UmyG8SHMhpO5Ln{m00=kcH# z#j3ir7FS!%sKRQ)KMN0Ma76_qp~0mrTA^KYiY`*Gm&uBBsY)q8+ljpzuHQBkG4g|>r!Kb|;i;=(P;SPj;%fQSITe;0p*j_}TJ>n5y`E&3 zRjVOi&T;@mRm1en&UA0E3;ha<^V!5Nm6~XvR7!HCQlk~D)tMhFm9DIn>**Uu#*r_T zFvw~gHc&x7aK!pyW%vyq^cyvX&Sm67NuFgK);?SjA>~`1=~+_t(QEPmD)!bST?ad` z0|)B?A_zR(-5`T?@e}WJGLMl}VTV~M$O)~utLt3+cme%EibUQ z&3C6a>^QUKW6ki^G`?g!Vf?u9dE?6#a(;9zBDMu9sNXmauqM zyqq}AFp87$M!8)AZm(71H5De=t7TO!FV({+$+XMy<$84~ab86xaa!%LnRxALWwjnA zPJl7ZJ1T=N6Mwl{54q%tcePw!!?))0&XGDC&)ujk3ul-!=Cs+#4~_MY1NxRJ00!Uz z;30@!gU&u?ZcHVS2)Gqj~K=0S8zp-BOz-E%uOqr zsv=hci~%=uHKQIR@^+nf}uVAnHhu!ndq#=d3)Y0X8Wvf$8Z(L_%N=BNM`^ir*Tc-I%LYbU3A5@ zBs&z*{kHM8`L?xe3ljMoPHcdGIbD0j*rp0#FIL!U??(|p8FLUhd?3F=%KiDMIVwE>^*E3bLFTMgv;f%dOU9` z&Ws4O+=SYI*NJyc0dPt~LDtKyHk+ZqEoCj7eCNoDDz`89aBzS>QW?unK31F%iB(!& zYgVW*fhT+h#p(qn=b5m3^&%5Nl5@;Sr{g2dKaKKK5t(w-OL)3N9IOx{dp|M;HX;=K zc|cNAa17{CRBV!#?}nS|+w?{E?!jm(DMx!N_i#se49f1>MlJuGu(#_0OX4r*uP z$*rkvVUV_)bQZR8ZlSwW+xKgM#3lF^d+{G6c&?mcApJT$_)glv0w2pgf zMOB0OdlhUi2-bML9VS+@ow%J)wIX$frCtzLT0uz05v?r+QpeRde;EQZt~OT^r&6zi zCd!e1y;g0;PiYpCja9iBwOdh0ZX$dMvsiv4$z0HMm$)x+I!P{UvY!$rTAb1Opd>S; zy2VahQI-%GXNmrQ*H|utm!CYA3wdx}d+!y!6ToNOgK7}AgyIr=xec}KY+_za%oh{$ONse>;w>vEZIP0G`C(#)JJ?_{ zE^YMa{VXa)_aUJIm$ME*P#?x+nMYtL95%;LddQpwpD370q1K4LMM|Y!FO>T0$dEbK zjRwSB!`cA$Q5BhkM*W7Zsf$uJaDUe7dUM7GBPW@_}5B8yN^xuUv?~ZMa1=HK+3i!sR`KI~RY&Q>2VCK)`SBzNdTOgyC5$2!VT3AXB7)bcweYPgBT?5;45mPNllXBfrG`8P z-B@c@D`gbtR0wlK2uLlnl`kOyTDO_6G3j@Zn3ogtT4Gj;Zu>fbk=T_+kT@KO5JO^f zxHiVR1GQBJ?XcYxp{Ibz6Q%=Xz6aOX4?TAlNIeat_M}AWS19FszbRyBhT6Eqqtk_9 zzUr$N0~6>2@j$5`TQ?wjAkb+jW>DhvOI(z|i?fo}nK5pfV7}SrRM&aK>xq|}5F%Ym z1rQq6su9Iq8|?l%#EiXaNG`MGZ5!&BwAB-5x|vOr>gq=BZDZM_@OsPd=Anrx5s!Xsaj0F7#5T>xn@_c^Bc*Dl5CIOB74Uo`H zKxYdij5ae}FCOn^wkEbw?@43B>-s?scb_7&>(3Z`gJ+lJ-OM55lX!RK%Y@A#gD^pg zdKhtb*#lAL7eR9(E|`pHwp_HnW=nz+&2|ctr1}k~LU%;FrHV0ci*^fv)eOSxi5cv~ zDC#TMaP{?V$LJi1s*SZekb0mio$hP4BO{^|C3@{=3)G`zL^B&EKuNv`$sCxs(ox;E z&cij}FTucHg)pI;0OBZ&Qw=;Zt0;29=GD~TqIRV?K2l#__a5n=Afo?4`lqQW2FX1) zp><+`OhG4%fat+x+)j;?d26H<&Vq22R;aD@EQT)TF=q^03<6VaF(hW|wyB;(bq&-0 za5qrYA8zMkdjPXdC;)U)_b{S-g8 z@iC0EwZ{AuDnlN{>L;a98YEHThwW+vK^Vy@LaY&HNhU3Y2MQxfJPc?B%4J@@hi1j@ zGoa5Z#8AZD=#5#nz|)+?58MYAn(fX=%fts-pMnp^kr8~jM;q$muw*}0Ko_zZ zn1_0}VJGHEt;BzbA5d}|--=DxNIYokP#yX#rMVZ%d>!=%1bSbkwbqPFy`|fW=Rucn zR_d)K_0#xLeH+QVn`8s%`g9i251o1w;eynw8>zp_s(*zEaabp^P!ps5y@R*nL#4q! zf#p&E03{<@DD7;1DogN7$dI>nq5IL#vw;Pv(P+nVi0$Rn;Mv_av|pEQ8Hmms^zOz! zwZvKu;?0Tk&?+4Q7J>-`4_(w1z0?&Q)qBso3!*F@+Zu-ze->hIqBhyhEJGiC2O1~D z-4uL9%z0b;=r4MA1_ICDn#P;`S}Psc`-9w-hsCj(UA#yiT)V75K%|=MV1lLAYG?e| zYsVVLf{VvqJodR`7qFNi{BME!JD4(*N#KtbjC7IB&!_@!;+cfua=l${22cQ1;x<;X zFo_?4tYU13BzCiP4b%~pS~V!TOdn6|%i(od*~F^0lFZrlIE-F;Me~p; z`hYv4okCVcA=I=e3|F-;^e3b#Y)S<1ll~$eWYLo>`vF-u+Ul>fMh+%TGp;UIq0rrC ztz#^;V1QZ6kM6{dady~8Mv`|#NpAo(CWJ~Spxqr7RLLQy{wdy_A5rmWvk+VE#}!?N zgdD||7<)YM{L{{Gt5jErDIjOT5~yNb53bY`ou{#qa7$xlk8k*JTw`^&&6|cuRA3Gy zDwHbiWxP4$+xe2wdC znD+52{%z#D`5RENU=fW4o+zFtFp^An47l!h^GLHdi1)eopnk%N`VCQ2&%(FiZtaJa zmfyDMmAGj(r{bBkY-}5IxM_-`%nHU*I2`D~;fC==c)G6q#+7>n&&L4GrVPT|68wcQSbLUg&M;@;arYq_9V9K4w)J}^&ptoen# zbqw-$nc;oN$P5qnbc`-EaQvAE4a)qTvsF*v15EjAxM_+J7D}X+xqc5BJy+BF&@$#{ z2dqH-BUTbc%7fP=<}1qk#H(Cx zK_Y9$fQ_f?apH)(k&-I08!!$t?13H(3Ly0i64-m>14VGIs8Vjq;ES21a-GgzVI6_` zEq?D-%9YFE&^`$@D)rChRouc3ZjvouEm!N@*U}hZ>s24Xgy2ZVN0yo5CZfhnb}?o; z)&#^l3`4Ms2Poc!anR#I193(K{}H@L9_j(gKE%%CzsA=UHZlCO@o)xLbQ;Odoyiae z8z=#EhrQ!;e-e4@CImiqDl;&oaUjR?&u?`YlZLJE#cn8rNIks{R>rNd{ghS$bFe$k5V>iaR+ic7v^8z+YqjWhNJp%=>z(RAkj9CVvSD_M_IJ`G}hm z`tI4*^~5=cy_DWwr(K5tbS}fOss01M*`bf@gMksMtMDSWnk8xE*HBBVB-SiY2D_ez z`#{-Mc?3FKK|)BN^EfgZI&c>dGSE-*2Gu)4Y*5CHQISak5n10m1hM))66~YN>dHhj z;_ZCwQT2O#EtC0=tYhm1zQfZ0%w)$D$F%aSXGav#+XK*kF}DN#xeeDEnU;l5c*(SA zAi_Oul&E-V6%2eE!40rkh{YJqzr#)a7bI8}r~&m5%uG=5n|v`kIQ1^d{v|FwCfQ5u zO!UVj7V?NT=%H>O#%J(=VL%^*CTJlidkB!*@J@D7t>y&QlNfsmy9vnKtJnpA)Q9td z%6Zpa##SBFbFEi>7L8T^6V?zU#nG$ZXYy~6BpJB5mRqVJQ^T6bY<_{SWlG;=?r*T7 zQ*EN&jwznUn}3BXqAykJx!jp?%+7d!qO_n8q_K0?^mp+=LZb-5E%CY@)_+5pfKBhr z3_6oso^SsRl3&3kTVcNCJC>RAaB)YK?<%-Pewel9aA%R}bGS({am=C(z9!*EeY49A(mgBOm&}IQSeqx`63alDi;Fe-66sodAuF?7p$PK1NO9fxFniC@f(K9<|;E z89cKHZ$Zq1U($#arMM1TlEs;{bSJcT*Ymy4B)$kZ{XbBk z+kd~G8vuF%WyF<;voWmBD*5S;h58?v1wzx;X2`f>y1aHGGNJDDW|3Zb^ zDdDi2os%RX{s5Qm+U(2@bzLf@t&~c;D7jBO6f>H5NI0};B|-ztX`AC=+n@vvLsP>4 zX6^`*B(K6?trC_RVVRK@LJj|iZwI(YG)x>;`0vQ23o&7i0Ell#hDYF#z6C%JBQrWY zdBP4KhMLCjAd1IhND0hj=)=81XLbxa@$0RX5{PI?Z+3bs2#tl>fTxELBdvlsH1$7GZ9cab!%=_0E(GU@Wa>vuus>-i zd$ijD9gs>Wl~RV2Bg3(M+jqcl_^yI${&Wl{m;~d=A)0Y2*oXMR zd@vpCN6DDb@NANMr5V21iph!=cd?=V5)t1ho2i+=YfDyyYs-eu08(TQ%U5z^>^$Ka zyU?X{cwKy#4?M_eMdjj9(YIvhdBJ7|1k4u$e0ryEd z`v*Yy6LLVO-^2T8cL$9Nn)vLi#{q}>`BQK}^#S|%;T|0k703D=f!_^vG|ca09H`6R z2o3hsYHUY}v#6&qkbyvdfm$sq2Pb?P9vP^QLT;1_vU0LRwUjc8#0#&|d!xRCUUq^6 zVIRgMM@rnHEz+4B8Y_`xL@fLX2B`>uT_e-Xfj#aV3AqL$iHAlqj0QcfIKsA2dmx%g z;F@g>!W%6OjY&cF61-RtbBsfIA9Z*3Ph>yH{$)`{;joMQv*$iFic(-~v3F522Qhv& zw1HMLEM0EJnxcd=?bSP99R8@@$*zY{g(^1%+4H!fMI>EQ<+{+*o7M`nc0z%?;T_R4 zx8UZ56oQTk-4h&w&R&Tqbwnb;yM%2mqlS1gY<*z|I~DIRUFd+9m8j2UW7_JYGWvb6k*C4SH%d#!*)7N&0+oH|MN%O^T@;{*J)18^XazbhDB9 z6BdMl%%8ND@LSHaN$;}gEjLzvh#(^<4H#$^|NgLZ zV0bnIks@o$n)or#xd={ag_qPk9ItJ{ zT4MyWIRfsI$|x=(gLp7daV#T=P1)u*@#NrgaltpSElM!*83+sbG`6X#*-(&8c!H^a zGKYE4he5ZcSdlAe8o%FvcI99E_V<7A;fFu?P@_bd79fJMR<5*`s^#XhgoT#Fhs@6LJ+WoiNAuQsjg9`ET<8cF@Fjx19+*u|v9v-s_kR3}&&1e>z4W9GWzVV=6|lVs zi9`Hdh9~|_7;IfwLa~b{OPxlZkx|$gqo*zQW5i7HOW6?24=;*@&-^0F@}LxZ4t>L; z=OH+RN|vW?ZSU5QC~6|>~S2D zezVeVw#(>je^Cc#0ZxXfZX-C{<30NcOR^J=2wFVoc@EBZ#B@sUXeafj>c+SP^H$L= zudE<0X-$DEX7ovnVV*dLT3;snDim0f@kCgo_cgOY)PJ=JlLK41I-)C$Mw#a) zV$_e1%=R40qleIq;S|7YxJh>cp7k;L^YMHHo^}XXQ-U>q(Ahl zZT5kX74N$CL5YYJAC&g3d#RAkh@m%@IL`&;#^MLo!h;p6OuZlTWC0=o&LcDIZGsz+ zh90(oOE!oBu0nk#g$`*vfrv;3HJ|o5pT!ZM7cq=e6`5ciMozT?>F&zuDmhYtyo96L zrE38WTU`rqFb67fQx>+EYhkmcf!I(Lsi$NSN+?k|WQ)@;x1#6*jK%PkUL`cpKjH(< z$;pVkPBQ6%j>M`f^*V}iR46EB2fYaslB91HmbhHhttlFMe_XWAIAJ#sI9MQB@{3F)gps}UWDyZyYRvmd6N375G@vkf6> z;Aj%0W+T3vM~d`98L_MPVK_LxmwkRO-Pgy$));Y`dC%o;C&KEiS{7jyKDcT<&grK& zA=}=C>WI2|a4?Z@gWY>ZXVXDhGr-4yE~ta(aqv4A<3P-~ z6{I#Cs0H^9VGeDI3TxtNOxdAONpM+<%R=IigB?dxBme?Jlnrw-_)g>?$cU}T;qcpt zmv$#!LO_%3xZ^a$wki9;2&d{Himmr18S5DcAH1C&KiNy)%brGr5kec(Ggo?pbbyDQ z!bc6VSH0t%houx0)HRH&Ggj!^sFB{Poj;*#Yy;A$!Y6h0PP_{6i3fyE6(!O8qS=`%U&_~YQ5Sl*H?=9)Wy@}e!^)s zw!f$D4Zry`-XuA?x#bjVSlrKcc^n45Bt&F1WAN-^4j~;YVTtRN>@Stu?fQD@ngsDf z3#ydYi<7#nKMeJAmFGI88IU9K8a6vnO4rA54gP=+^fx3L-QoHGu^IMRIobDXx8QX9UT$vCy?k#ri zdk&DRVzXbvt{|{0#H~V~M?I~66JzOECkrDKfLRM`6fJ|S5xt9M(ZhICaUmJVh^B^^ zqi|%Z5q}=9wHooD#_QdSI82B7P$(8qw(5z-I3(9hduSoH|)$=PnxNB_3DVy}rBVN=_?7DIfzK-dOdbsQ}Vw z)q`e?xyxxsl&1qrLsSAo1Y=+L-#cVL45MUE^efaJ4dh}0F>^mIF>P`5 z0JdI=9J)s$1R^=UhMO+vbH1O!=b9Am{)nD6vm3!mVkBK;f|aBr9Yx)XtlGi?Zx0^> z06KZ#s9=I%eN=|jUqF(W^}XQnvjceiQNi0(R;1f0>N*Jg6$^VPP9AW7mc96?(HE%m zoa+vD@HDFY+@MEm6t;vbeE~Aj(H?oQK7hPSgT+l*81yOLrJuq#AKBf{4|fM4B46Ci zFJ9yTPcad7hjxNK*G_$*IE6EStN5P~SJrSY4lczAeEtP~_5|ulw6}`Y$C-EFKg7m7 zPVmr(6lZl2VdH7+`WtLk5P&|sZ{QNO(SVO?76Qfr4f}E$?H=T5O)A#YfTLq~gtm#e zeZL;Xou)PTM6e!6p&@M2!+{f{+e|&CwG8OW-iB_i&*1tR2ET>R6OZ~1d~j~OhM<*V zUi(w!#GvefLdwQL5MC7@RWH0~O#9MHy4vrc9kI@KEH?g&Mxz;|Q4D{X^Csoe2%Cas z>_OBZZ1G2G1#d>?NK_~P5;^x3WWeh&yTAnhpV62HFA!&kSm>IKHx?|K2V?q%bexsE z!CuB$;0$bHI$@V8fuw~I{?{t@M<^Q@No*aUm)B_3Ovg= z3)p!Eyqf|!$0qKm%Y+{*4F5<;XK=|0oxWc|Y)QE7C3dd}2ypI+`vSZ)>mv#&Kj$S& zz*W=U1Mvfl#Hx67z#|;qEs+L5kD_C{Qn+JU;(`Azrz64&Q|U}d&jSOTUG{#=lkv$A zP9uXAG{h#JNe2TBhB@k`J@!rIsRZBw5Z2 zcP@sSN_zv?#YJT=p1-J?sJmx(gw${1O3y>kJp(ZJI!&Rh>im2!c+v#>HGZDL(^f3u zmfeqGJP?0)(uckmfdl8Y)G*KE1zTH`AfM5PN7XO$lV4#{U~)f`&!ZjnkD2>DCbLYw zfF#Mm)`(gPKFn*vRf{u&f(3dH%JIV0N9eF*HnFZgqP~LfinD`i5~WjKs^SpKIwL6B zNn!AE0j|(;w9u$tht=P1pZ2FoR0Y7HefNkly27HU{Y`2PQ3Qwz!a-QR8B3W$h?cQpToavsEXMb zhl?Ul&Q;4RI)aMqNikSLFkO~sua(3<3DOQ5T60G9A`;5Q^iPg8@hFpcS?#IZ^Z`HT q`?&-D{@eq8#-Gd;CXP-VnV6qB?9WX<tA&rd5U{TdBmB6=&C##FpZ6*)GS4oy19$ha^tQRZ01&^OVXHDle+a zn;znP-+#J$F1w%{x;Q|e?$dp`&wu{&-_L*c{OoMm!0*qO?*x124dZ{YlKmW9oWl`S zOv4C_!0Z^lKAXNNb<4NpZ2PvH9p902!7s?!^@})L9j{yROD5m7JLT?-KO@&pr_!DE zXHDZ(BPay!J4WE%v-~P*#lS<&Lv0SVQcy;%jM_YEGeHHl3Tg|e%?4G}s;DiZHW$pJ zHjmmd)E0t8)D}@&LhV?vgxV5n$5A^TETguJ+A?Y0=fr%+o#?UCR# zYNx|O`{X^_KNXnQjnzjV5JzjPW>UD+ZES?R2*TT2yeReWgi5u7Flw?<_Oo$u4oCE9 z6rn)`0(oX&y<-4*cHrRbgawU=d#3MdBmjXjeBTRZ&~YWHzWmyS=U)BtYj3E=U=XUN zh0pQFGmi^6BF^d192#|?9B-L~)TxKPJF8aWL}6z=u~GSfp-O1@lc%1!)$fMS+zEq5 zFMcL&HJ*7{g^g~gp1G?#iOvjm60crw^;&VgzQ7)&TXXlsBs6ETHWOfR=mJ&+7qKxk z+UC%_XKBE!q_`2rjW|}y#p6{=6>&x35nN#G#y%$IzLL#t)(xkGno51nCQ-NE>+HMNNL0G0RzJ;mW* zwWz1fZ8Qmng%q7z^caKdCgq`Gb zNu=krzKn{7{Qxrxh)qCj4M|>8kQP)Jh4HE_pbCiUalAYYP7U`ygd+u~OmLQ_z>zPl zt(M3Yq||ynDc9@Wez4Wyb){avz18UGCu)YBR8b_}K=r}2@s2vldmOl`u^17$T#?#U z0mU2+;bzbBY|pL~y@K3t;(q=kI?myUUO|zImN^kBrf-K%E>sG*7TGESMhljrmZ%Ug zB1`>JK-A4Ab1#SC;I&pSxX|y#s^9s6^?;qI52ILXl63f+AfvD2#?ah{rUrIO! zG;{)c(^97cXU~bvp%Yv8EOnj`b?mOQ=bFaQ-AAio;lP5xa(hcd=#EV=1IX7xH0ZSAq}+;Hy(n(wM-_=hb@cZ3H ztCu)&BifWM#;8IFf^_sz9MK92!<+|UI%e6dnDbTz*Dj7lb9ZHG-bOMx6NQ{R&f5hX zQ4a-0;r7{#iT14n<7MNmr!jxPYz(p3E>IlK7%Yc&+rzmOm=umf>#T8m=?z1-3akM0 z&@Kng3AUZlZA5_LEBlpPfUcT~T12Kpk;ZXjo;O6W%xYmdKPkMzCd6wTb|ZC+Ek!~e z=M@D&Vne!M?rt?8g^TN$n)s8?sZa1Rg`^_JRvpdwd^52(!=0pXr_tG()-uAHFQQFE zmZfA}0cvYFPMNz;Oe1#^Mu_`dNRhb6!h+=|agpH{YR3{~W3~%Jv+c&kwg*hvBCt}- ztd=wmX0$woxDT6CbNN{Qxtq=IJdXxpV(li%RXkWsi$phR+4y2M; zqMDeLnP(2EEy`-4h^KHdP7$C4VhwUmum+=qSQ}Gf1wHT@-cua7dWpq(6q!cjTs~Bz zUBg3dD*#x6ynB2aSW#*@{Ss0V#udK!EDFqAD%k^T(^f07831C)Xrry^*=QF6now@b z{1y$R!dl`$C5C=UZQQKO4D3FArPtZ1=~=4rVeP~D!w=V5Q7zgU4Eib#gEM|fOU8OD zfY|&bAV|uulVL^3YW^q=ZtX;^W+Zsf{Qf)|rWi?_hT4b{cN1pF#v!wVsQVI*RL@hK z?k*q33F!&^^w-gHZMCePz*&)<)rVM!Zs%NTjs7`ajnq237^9z&ZvSg|u!qG!Y;T^aOAv9u8ddGlf_i$YfX5_jQ zRDxN!7UiHC%;7u}%m)iNSAxah7|yf7Qg9sSYOstMTS*Ep#SPVZz=?;#Cv&UIC~iC; zE4E8wsC=Vi?0LhYnh(sL1y(Nw_P!ZFLBB(zzzW<0^JRbsAwiO{ zzkLd--`j^l0`+eWO9$2gJr?&3_$+_RxQ3|_r4NN8svm?^FAWPQ%dkd8*`utG+}gA4 zY5;ff8`eo<=&0`mCG=lDVf=#e^TxU<=8YH6!JU-;@z#92&|YjG!#lJ0+2@iN4pnW} zt1nP$ z)W`6V#JPOs!WVs~+t>^fcfGH=jaV2ysn%1Kq{2ann^f^Iy*D7;BVY`z>%D$|5aC{@ zwI0^R^>A^0CyX?U68xl~8oiAW*89z^_4QCiGG^b!U07lM%r!Y*hg%A3qOR|P_h~d~ z(`|MVil4y)zcd1(ATao)90b1_#jWP%P71xYVUi*u1rgmPVl%@msYJxeO}uaUNF_t+D;JubeuN;=k=?E1 zt*_zG-I=>Hx;uF#9})#KKQI0KCtR?fZ_h)NGDHE(-89AoD8wuTEX6Hc`4a{_C3E{+ z>>~OCQTW#Lv^MZo5#Abf`ALI*lzQ>KzIgk^H>9Vww_k#7M`3KVXVmxk9o(Zho(&*| z4@}d5C#UaL$M4FudD7UQW3PmJe_pTzdrt_Am=vM5x}aX?HECGYSkzG@MY1CzCd%in zw^&f&CN{ahpc(y5-X+~7i}1d#H&qz+Ud=>=q9RRZ^9R>nJ^!5VthYKH$OGK1gP<~B zp}`MP@@ji0rMCOTIiT z10Hu+6sz4{qgs=np2S%_MKrYZOp0~%w^_Z#qQxSgFV$w16Qnj-bWp(L7w!~Wy%P81 zMyIYLOWM+s>7jlbv)TDoxQnpewvV)k3B-4CL{FkHFkkZ$c5)$SIcpC7Rnz%fpn4$a}H7pamFA0dWM*fxSP2 zFIG}JZB~69_hFNL1J;5r*C8r;))01F0L!gKd5V~5&b)}I*!`_o^n-Tjt4gMy#*_M+ zEM$U-H^pK3rFDha2R$^X;E+)f(Hy>a{?g@3YZqU-<~z+!Ymm6`=MZMqii7N(p_S@p z8m%OqAL57@STMxpS;iz+p*dL|2ELWFM5GBzP3P$<>?n1?_IEeWzcuSrw zEt+_k|44Y}a6~Vl$h-nX@VJlw{OgDxR%0aE2R}i_`rI_uSCmzZYErz=?{vauy!N1m z&orrA_~V%qBa~gkg-QjE!$B*8*O$px53T9vWsOUD+c885j)>%cfg|-{T}-HJ?#`zJ$SvnQNJ03C zBpCy_^U%VB;$49^BO^xTru$_O=-=STxbxVkN6-$}E$a2WQ{sR_V#m>+qIfqK38SF8 zI3-;8LG(k%+QjdNl!r8hZXUXU4d36!*}=I04PGGg28a|P9%`;YyCPN!L+vcT4^2FT zYXwc}s6UZ!L!ZCn@k!vFL?jPiFUoyHcF|`s@3S~8BGy_yYiuv0br}{x1r8iq+{4#y z{|+R)1t_wx|IAu0i#R+gE*f>B`&yucx0VnvgaUil*vE_T<;?GjXQ2th#M~o%3WJvc zc)AmbxtW-ECTt;2H2evMO~s%15x>uggg;Ts@J<;#<{#yaC6*(BA9W6|h&;+6rcseY z{3I#@@u}29gd7FJ%(fuwT|>jYYUOVe1oDK8mpm$-RE199yT(aFdr%|70XgXb#u(i; zn(IeG#@A6LVPwPl=uaCKhMKuo_!>=MocQ?^b6FSi_1%fDn-V+ANAZ4UP2FWrdcyQj zc1B%~4T~NBR~+fEl48c^OU$rmy}~zgHcP?oQC|8m&uZp?DYGB917xpha;+?;4;%PG&}z}@U1E+%c30UYau#T z8K0O591@R&Iy#R6T2#z@2?)UahUWy6n|IsrrN9Li19<_0qGW3LSQgoKXl+oA7Kf?s zq-is@h#9UF<*KT&*keImniQZY`;qu5+V>LjNS&$2+3f2qMA1kgc&Opt zf5(vqFm>osV`6tU8}+26${~{hWZ2)tMVuNs3?}Tu6lmKBCpZU|dLHKj4i^Ws%=c+? zLJI9!y~|@QVRWdkphX$S3=XK3v6fGcwVXxEJ7_VBFO=h|j*=qgXztHZ$cXA$wI9p_ zCZ$+>7{%)Rh_-yJ-#Rh767*$JYAsrS6~;c0;rcba1jyDAI{Ii)+2yF*On7t^a{T)IKc~ zh9DPFp5hhb@gwz?$xbL*4ig0N1V2krHWrAavKk9NvV*bNACbUqJox80(xt_+3qxbk zD=FS@;>drLeCKdPA48Fw8B|_lSqp(AZ@|dV#)cb|FfHYzbOT}a%dKuJZjyfZYh6RlZF(+VD;AObywzecn^bLZ{~Jdd+hxpT zcQJ?OgS8fZ%zima12OjTJ`dXqWa^x+NOlw)SmP{3i-@knOV`o$-2Y3L<%f6)?W|7$ znRR&w?>X@5ZUtDjhma|37S&C$845J@Vdi@fMTpKr1ff0N8Q1LY+U|+hwxSqed907u zkQvfa{T|8rjA+ptB7<^?1fl%%098H9Ld24oxIf1#=Ttz`45BilFCbx zgKyyQob4B2>4K{^v8Fb(qVt+=(zpwwE*9c0u*ng3flVCPI6Jqi zu#pS6Yo*Z-XYCceroD=+mzEPBRXd+-RY_|Z5GE^&>2(5S-glMvkmeYnnpc? z{ZD0tZCIz7((&k_kvBwsrfUg;AH2q)(}TQ#BVuQWiXo&PSdU~17>@aps2i-RVEsfR z1fl_CD4|-o@Ttjar_r*H;xkYoG8`G0sLy=dW0E5k#FQMq*H^8LRHOeWCnEtt7vKbYB7H+Zct}li74H)D`Hwg|hhsW14Ze^B zsbp0wY_-P8YF}n5LJoOF7>-$S4R84>gwBP zb5vNnOIeRcvL~fduG?vu5*O3ytRju*p2Kt0Nuto*S$qo^N>glv>;+f3ISb^CRJVB6 zbkfHb9HtP)Pn;+4gp08Tuwo?}z=YY#Nz4JK%GZD+lAwhr!NnAELM2)-0<(t3dP+xN z>g?$wmps_~)e%VaHg{(;kWzEjls6xeEO`9 zj;It6)h;MLIa*Yl!;}9%Yl6E2m>wTsUjVXKXCWl2N##l}{Cq#wrp1J1k#h`@ji9jE zPM5&%SQft&fEMfGvC)ABH97~gvt`DiMHNkCc?p33;=VJiO$5urs1GR2OM z!}r0;lk4m_l_x7x-5wSyL{F~b&!9qjWS0DTDpI(VjChtwC63i4sl}kP3oVJ2mzZZV|RzI$3I9@Enspi0a z7ny2E=M?vuNv*M8mLQ*4KfZ^B63GWpub}ameUPTMif7Toqokxl85NqmAr@ zzlmvh9yc-)6p^Si8h~&ZSqQO8(h62Q?$eKB9ylgtNP99RM8LCvPi*kRhVkRiWua6N zU*E+o$tqBMuFcN3c}4n5*QorWo})++vFabNm?F5phkLg;IFcU&dCUO-$vrYL$VqB` zST5m6%1#Mbb~4uSNXq^)9=yihwUni|hJ6As$rPWE0rMZ3`y7tH>L#SpHQCt#?J*%* zCno)G710@<5=F5?l-5Q!LqOvkY>9hD1uFnPe3GDt4G?7g0MS+4yO^AOxzj+EbM; z&8O&+-`VEyv>;^G?_;3K(IESWj5iLEKT7Ca6n+F1&F}TYf5f<0O)SWy>Uz zfgh8!VAG!q9uJm-6Y$pNgO%VU46KFVRPYGSi@|ANwT6V@Xs6fI*&p<0b%Nj*aP@$! zk`%G!37g=e+)?A*oU*=$3S4A~NF$lWQY*+!!xYxd-m{=B3wO>Sr@`o0U3(89sop8v zb#V8b+;#Ts9+FswTH%5gk(d6>q0L;VH~G~f_8VZo3P=JBH+0sq_;#Q7%khlPRnr-0 z?ecz=nR5~wpObQ41ec2p_8wHV@9M-4ZQA@AfJTb1i%eMhh5YFR|1znP)-s)7Pwa&%&lVRAnNP$04~)zUDU75wUXvfsIg8TOB#TCi z$RC41#JWVfh5>Sd>>#IrAO8tXvS;og_5_>#W5OjO!eZhU@0Frs>!i9_1uY+Wed=n!3Ig@d-d}m5e(B}AEjzRPUCX?|z*-kxx_nvvGh73I9ERj`3lA$={OY&a4Byz{_CBA| ztJ}inzDbOMS73?%AyG!$8NvalfVf5SL0Kb?O(udN#IC5RJGj8O^|U{?!~EDhcel6X zU^m@nS$)ieL^6#pBh~IpVn*%M(2fxuU@S0lTsc|Rue@2ro9p9mqP67eQo4;1PgxTW zxkrl*WRb_Jb#p6Dx7Unt=GHGkk@GrTaXeu ztyHiaou1rUB8tumib93W2N z)P%_^e@2+Zz{%%GOo`0qnP5%MYt}ct3*B>#eNFb9+FwU?%GUpFtkZPJhrOMgGKjhz zNJgDhq7$m#FnXs!6BcInjGoyT?k8PLO&g3?VD;BhuMD|g6Yi+EpSkWsRE_8sc3kc@TQH0MsKf_wMQ~u z>03zOd&pZQy!FTFcIkf^m)|z88u1J!+R0)Nx65x}J3#OAfU~k${*I}ZHI&$yH5*sk zbMgHC!h!KEBoywI<3%J6l=YiPCe&~q8S;fRqp-a6hh{?+^`o^@$pRmEC?oj)< z_diLepT-60)Wu;T-N!=IU@@gthG#8MEzkO@g8DM9bqe$DY^t3#+u{#=C^1XT8>{$# zY}y?WYzcY$%w9I9ImIwl}NTwuvMEQTCq0@qQAG%CaF2yS18|IZM+zp?VjV<_uz` z5ju7V%j3WzdBf_2ZGuEjcn#tcz(&DV!XhLn*b%Fe+{><0>a8VgBg8fQ5N=`>BD@io`yXV*0mSWY)i)KjAVlK2Z1mGJ z^BLS4Tkd2`%U{I$^LCa}E$IXK#uXvYZ8ox_TZ6Ir%B^|}C{INtbu%q`Aw+Imv38G* zaC+FFCwRFH`y48eU}+ZQ_G8!p4%=0>ugvQ$DtZwLDaFgOX+&GGh#D3(6V>nmy-DOx zdG%jde2qm1MW)yv=Z%lBnBt}y?#*#19I;NK!@|b=82DP_K6hS9C3Hl__z%`~D`mh|S8s5>Oh2I&Q)saB?kNj*lweU4(Awmj@cUK=#zrnYD zhsA$jF<|lUSuk4W7kXP=q#4VA?5me9%RZ9?LzC?eFgPONF~2PB&NTZfR2+uzQLxMl51LaXXm_y$Fp`p+x~*--;YytDc*!*Ds%h@M8F(``|T4~>Ll bt72DARez@XM)i4|m#QbLk5)f~tGoXXO4Gx< literal 0 HcmV?d00001 diff --git a/Dreamer/__pycache__/wrappers.cpython-38.pyc b/Dreamer/__pycache__/wrappers.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..562e5f9432220fe4e055dfd6371c40e19f94d90f GIT binary patch literal 19394 zcmcJ1dypK*dEdNtc6M)X?{GK(2N0x>1SRq)3Zy8CQWS(b@&HhNpUhK>r zc5h$knFDaEJzFLyTaratwiDa2W#_J<#DSu!C>1-7U$G)Pu9U-4#V+SHsZ=WEN>r%) zzMh$V+@1VLae?XS?&+EC{`!0O*Y~mM>7s$(pB}yGZ@*|5f5yV-&p}}s zS6DI)!#8}hWmNRtte8@^Dwf>siY<4i;>bN$$;sWVznPX}^q88KpUt zX8c){W>Gqf(jk8ir8$)5Q9A6;qco4w5tNSjM^QS8(ovL-`3op5pmYqS@M>wr6nQ`tyyS5%M;|Di3nJIK{21+&jAgr^|=+8!B z8CUorB!NL70)VD(y=ee|w(sEX1UU_iJ7&ezfB+B$1QpMpLg%HpeD>v)XI}c`%de_h zuNSDgh0mow=6S8)3c0=mb6`{f`YXm=bM>wnJJq0b^R5*;VbEHOZ59kwLc{;_=u_9b z?ck}KfnV!HPeqN|Q)g9BYX|D7Te_q0bZ;y6s?|oP5ml>)*n@O!_K%H^=ycj<&(H`0 z85({e_no(#f$8VYVCYSA#~YZt zL|m*O&DWf5Yuny-wsXEYuo~vJJ8%c~ zz!~J$9N?I>sus{5!{!14%c7vSl-JAX*6JLpG>Bal+^Q)*cKmK9h@D1ftsC1I)x;## zDSYN3Tp`!WFy~Fr>>r+(3_BTHTP{LI1xHcP!AX>6D zyg3QQnNN=A3Dj{+$#{w!&-}!A`_BDzlTr zNMf(2x}fK1OD(dFy(o4!G6bOkqQA-^nZXsZ%CkJ%vn{jedO3MgN9EX$5Lw0*o=1}L zo*CgfX2lMijO*l(7X~aGNOPXNX1m}(!f-1EAJAKxjc3jVLGR^8$6x7oBGql(weGPK z^$e2LIx#5yjbYShP&+VpAX0p5*Luw&HVqu#-muh3-`RE|bKpeQ9ZS8)QMK&0v+bJ3 zz}-QsL2lOqM{_�|=Q7FIJ&INI>k&8Wbcyk+hb@pZhnZ39z{ z>J+)*QeibVtM^E_6}!IKt4v+1wOZh9RTPAt$<@QnMiks5DlM6n0#?meo4vpX6V8Rb zRwIgwjj+)PqgtmP#C8~|O3`mu>lzyB19&fXT3x&))KGpQ1i_L9&MDPZajqIR`apgk zgbof7L~*`aC#$O^B%|i!3v5+!^%F>iZoAg##78UZ{1DV2M!g4DNF+AQS)iz6 z7R{15Yn709am|_ig^9HrGVM_=$u;EKt>6lqNXR8qYTB`OjkCtt0P(*x{m&36f_rGkdalxDS%3PF7W5|w-#0=&K1j`Qu> zZDs^Eii}}NOB=9lzBeIoOW8Lu;In8Q3V~Zivww1Of}>6MNS17FSF^cs#kZ{pT)AoA zF+r!sjT%vlA^%32-!H{affeo*G#QN~Det-R)g?QgK=}P_rOSuBKFNOTCP04J0kj)T8`D zA!>lXQvhjc!I#y$nLNg19E8&I1=NQmQ3Di7s-y^+rw|$ErKk6mYUJaB7wml$FMtn( z9QdC=4mu1Wm&mxKoSwNz6V6<%GP#5#6?i|$ih~5+WjqYY!VHX-W9}cB9IeQ_F*N2- zi6QJId>RRsFJbwR*`QG)d zuu%^M7Mc}&1Pv3Y#7<4Ehq1c>Rb_pTYQl+q0#~96kk9sy?u84H3WW7Ov|L>(swZ(* z?`3kD2{A!E#blAmMJA`1$Xceke#xJPfl~C#{tWI@{;Yop z_mV&7AI5#!pZAa8UiOb-iKfboOIWh|3u29Qaj9{|a!g+UH!5!$P01V~Fnz2~mhUi?X%})T@|IY>d$b*}F`Y z)78r9>t#&C+%a~Ij~k|O16Fln5m5(;8c{1)vSRD>(-r4dSNS6dL0pInXV!yGaJ#3T zU*{(YatG>f|6y5zOs=0k)9Ti1t?>ENS$p+=!?;2+$aV0c*XOnE^f_8a82($ypEfU z0CN2t^BH=_#WSr&7}dM&-t!7iWKJ&U^vHp8%3c7>VBA7UJ0g-VcADKrM|}{ViJgm= zR(_!3v}+qd?5=fHyA}x>h|ASPVyU1PpprTPboO;H{tz%j>uRUl?S-goHP(Wv7%eXH zH-k_UE+-J@RIRffK<&S_xwaOlP-b0mQ46K5GId$*S76{m{iy0%a0KGN*7T;wN3o0t zmBMfkIT=HxkPV?y4x>hWV=EcGR(j$*JP;ubErI6`^XtMh$ZD?P3Kx+Wwg*0b0zB&> zDlXZ-S+jWGu@=mJc^uMXI6(9z4y@n`2`f+zFH@B@y{?(ZXiW=Sr;RZ%qg>PVb8o?V zWbIDghpyhx_H%w!J&SK>-HytqR`#6OZgp?P&UF~b8b4^>YUR(X!Id~Ow9F+( z2ZGBY%YtNBF3kSZc&uF)R)0?SI;K30D&x$Pdq{`;AUoVKcQbC0SlzjLt4q%ST1-y% z7V3T!SL!X9(p}06W0NBnVHeWZA0WfNergtMmaY#d{-!a)T)~3De94O87C2_W)G{L% zo*gjB*Po?Qf_L&T_8{?(8#KezhrZYMZt$tu^mYnR0?DL}=9Kz{q>gNQ+6Q~yHBAF% zpl(?nt(Cla+}N36pB(efETKfjP*9DK6ehQHNcqfj`LxJwBNyjMobbY^>#W~ka*Fjf zc|lH-eUr7sz<3U(+m*Tsg3e1R!%!_gn9d$te(A+$D$ZJ?)dF`wZ561Mni^exh@4m} zjuS}o1RlJBOL(mzy5c-2cNR9C2UIPCa*tZ`DEE`K)S4LaB>hbxlfxxz`AuYeD%-M> zvO-^>!rioX?8LzXb9~#~&O>D8K4{45IqE7~G;^YWdjq$3ap0jQzhS@ugj_X0aMh?*@`Jroh*?WFt?2a)6;sldNP8K6eXKPD0h%rTK4U?Y}1I$9VY^^q_w*sf8;$#0nyFC9_W<;lK9T9y)%U` zmJ(fVT73rfhHrcpdW25bz%)A60J@$J?XHpdTr)z3YsT(wMj|z|cql7D1ZpSh`#S5$FUbtLw>a9jEc3}jk@=W1h>lf=hH4y{9p-dL@dSswrAMumyQrv~PH!IY}^B@^vU}(BWFw zYV{2&a6J{&;kD~_z=?xiBZLi^a$+({T8(MrLe_Q!e3>rw8ypWiFhIveT>aT(5}D3D z=KJ6r2rQWg$G2w#M9rX9ac5<+zr(2eU0f;M9v=1tOyZ77wK~=r4JwX}9fp61oGIFwCoTj?0&rZ&?}6*va3k2bJGkc{<8vfyAFe03T+Ic@V7OzU*q!0` zA+HB8%ph+a^;`07$ow}wKJmTda24X~d8vm38GYunKJ$Y-+_^>hN)b9k3C16u-@&(T zd>wp0+hAClS{>t3oF2Ip;Uf~V0W5fZ9v(^hLd?$@JFxtA&7T!(M1YFT8IK-XD9I7D z!`Ja*Y;MHn%@KWxOAH%?ep#_WzQwPz8yEjzg)FAfC#N2Pzw z%f|f(3Nr2!p9dHPK;X0hoY@qh>CrD)V_zUPkuqfF^T>!DR-z&KIpesY?XMvkCH7Wq zMo>n-i01l{Aaf1HKf*OMv)=O;O%Rh#4K&zzKY^EVBP=x(zs#I``%B2lw*`Erb>Gqs z-Lbj+)mKq{o<00z*5Qz+vBS4Z!euN>X6uyWAvQ`WOaVtPshq?y^owZHaVepMu z^bh**x01kNpIIc~fwfM4nja(*ma35Y5hk4HcW~kN=N}Q2Uh+nC=AS{mz%R~0ly*by z#yZDZL5;)+6OjEK*2;1X*X#+@{RytbFRDGF8Xo=Wbk=+IT27fHAk+Q=GEt%w(HXG= z@^9L3M>xBddKUK_E*BRh&R3|Rf+KBPor@zap_-^qp+ynb6fTIDk(Q5)w46rEn`kkO zFBGG)cD}-sY3|IB-N4eYR+sF1vlx?llgTqkmS%@IR>8(mxOq)>i52bIiYG`%39ol1 z-uIk%o0HOW&#E71tLd<@-U(=$1nYsSK=+K^_nh3`|BT$w@If?;53hEk3+-MjXa{KP zYnSJbaTLj7d=z=Ai%8Iu3-x2pD=602agF`R1($J!>@ib5DAGn^Dtrkq zf$E`E5Z5nYfr@eADtz}B8|_9^xAAuRW1gQ~!KM9kBns_7gkp+3RE<>awzY;)sQQ~q zHsZOd$35=^%A!$t>BEuv3KyUP_rKT;nIp9b=n!++sNNYZ;?LP1=V!o+Kib#+dIU$7 z(O856nFzL53*j96_ptD_|30%Fli+{>UP3$Tg8(I9hdeF^_TY6N!2@78xsANKCb~p! z7sde$0}pHo{(rC|!kF+LN_}^=f9&PWFoNSBoAir_T4|_mhp2s84En2D%>F#a2PXZ? zxT(L%gc^{>xvwxM1_4>TA|(iY-F3e=PoG?}Q+V-z;|k}H7}!XihXF@sO=BF(R~~|N z&jt>Keui1(4WhmhGU8DYuA#S{Z9NAW{u5|8qMHl}g%7@qOT;#@r$007TmZF-eUHBc zDat2@iJ;~$;lua%agG6|K@L1AE{nVMVDcl7 zQf(hd0rE=;FL8C{tF^Z5?Tyl#tLrA0j?|XJ8VuoeWONko47nlhEhD#uiSG8NI08JQ zxQi&zTZ1hSnA_L_6(PQ1sO5nbE^Ew5yf*aNSn%3FTZh*MwBy^jJ2y($=&(Ue`DAPK zyxvf!N(wHugWHfyz`DtGwT460MA2DTDb+U>Tm;eNM(1N}F@ZQF5Jn8@I3y4T|7pJm zgvOyU5RgJ>0YAy_yqC#oCR`{9iP5Ax>_BpYo_`NbZ1zI_1oU)Zow9BZa@wd|#w9zD z2O;PFL3bh~W>>jQ(1MFVk31Z+vWWT|!3ukz39peA&J7C}{Amn{*rl^W=b?H@_(~o% z;y=?Zd~+8XNwqLm>%w=*+M<}W_UgU+_zAa5H?qmjDpRfQdIM@^6&}lq*9g}bo)>5h zOre1akw7Z75iSzvZe0%)B2KFGtmmrYTBoHrxrkhW;{-1=WGxPOT64d3|jNr=2N zg!AwS;hfy3%OUZgVfN#4XAAQED2Pf3$0F`j`kY;JN<`7FElMIp`{*;2a(H4fex-@{ zz5(8o2o}Uxs0;WUwrfO)d=+{1_mNZzh+hk_6SNVif56Ir$Ydh;-+F-a6(O@=%*!2nGuVaE z@Bm`~@`;l-2Y;PbUhTw9PLB>mh)hW)H$L|fzLBtBN`Q*jRgLvVr`DRB;VC?QBAX#L zp5U}TvS)^xDSQtd@+o{~WTSjP_OOd4$Ji6O8Q6m-(Q!G&`Z}($A0fAlYce(twvjln zWY<|DbGb#%jd2$YBi$rVw#X41r#Hw8ei@8oHqKwV7BYfU9K-(~63(e22q{G#s*~y^ zCc{~0n&dN_FeQiR0<%f~?+kT5$G!yp5RN(QX@8<*I|6wHmsk}kLN*z?V=^d=+xE1Z zWa|tP;UA%?Mz2NYhV;bn0rh=MCLq9eeL$&r-ST_L} zwtJD|WcPG6fa*%=Soi3tT&rx11hyU{TnL&~(v5(X;=0jIFg7A8q!S~$o9snA&PNji zWAm4W1EZnYpH2ss=&z=b&jDdGVo?e=SmfuqAR`9mus6vGnfxl6Jk2htSi-i1+Q6V0 zadPJrpVo1W{cMz$afM9&Y6=7oE--4}$B_lZ%ua(Zl;hH+PVmugq_v4X$^?5`VQ&OI z{KIB5*+su&h4WF4E(WLs?obcTs}G^2o_j_m74Ix!-c$fRU}y#%Bu&;FG><&fe?NvDq6ym_)Pr!IOoPs z8}Q)Cp5F1X86L7dnV!ZTO4bPVnCA-Dpvd<-1&v2Ue-beJC~H51Brd1`!7l+qq9-AA3XfkWgh-K$Os1!gd!hY9 zV^G?|mAHDu7(oEa^pqPl@W5G^e!Sh8R>Y~ z``7T`W%f=JL0e#8qb^k~#bi?+k+aFQge37%{>a`l{`WWvn!-8a;eI74Ob0e2mmE2w*I8!s2wqYLDL_BU zk+BadK%kNGa9n$s4TnW!UBRUtz@ny61tP$G5;Q@=cxCLzd3yU8+g#;>#PEq4bS=T$-zX%kqwCq+wc~);BEN89h@M^;Y{I! z;tgkzZ$WR#!>%bJFWL(7Q^{GMl0H_60AD#!i2&b+B-Gce%=q8uAN7yHMw;~({NvDZ z4*4hi2XLSBPXa!R2m%hbI(02}pTmH3h~KNo-6KZFd7OB{fw*wYJmQlK6Mhi|n5g0p zM(~NH77$ei+^CwJr-2I=YEC24z-U=*dm9d+&I!~ys9ly?XWQ-|=wzU!B5((B=U*7u zjAD9?U(Lg#hGQzgHPFVuS;IEiTdXfeQ#gi#{T&@W)-3Ln8Sy5b=^069#Vg|?LcIfd z8oN3`L#rA841-3%u8Rn0khHqW;-RT+m_2RPkE&V?HCg1r$jC;T$6OKy(t!Ej2(Z(EB2uV?+C2 z;->wCH<^<`4M)aCU**Vz>~SIo#}ptn8~^lKblUi}6~N0UdZZT|u} z#z=R3pw0^`E9b6U0qP(uCO|y%f#E0L-~@!ZV>L@WLUt%!&!ONNW$6^SoWUhr;SVDL z;lY{BGk+jEh6wV+-_wi-j#$PF?m(j-Sigd;A07aq|80P?w>TY$ty>*IhI6LLESm*{ zs5t7ifqMtD9k@UZXEpY&A-Gc!abaz<1HA6GLU10?K?5$Taj2-@ z$1p)(D=%EUsQxwb03PWoHJW`Eon^AIz zhNz^0=2llOUb=EF&R@8sH=BQpk0_w_)yv6_evhCbty6imz$tW4qy+#W4~j)dD+S1D zu{*AgNeS`_N(uoJ7ZJnfei~4QT8|dbr%;b@7p@j=Z{wCa(jG-#>L=Dr{0HCWxCE24 zWfCP}6uFtt$dk*P_D^d-_fE;L{vDG)Mgk@wn8d+McFLB2Jkn`0?ftEj3^~^DRv#Qv zhkEFcr&o>6NuY&=#XYSTH-f!Ln^2<%-3Qo*wbc0mk8Hwz6nl}iE^4pRT#jJT3~fn~ zbM|Wgfpf!6*F`OF7VD}OUdKtR{-c*Q^Y9mOh$O_HL|E*uEpGBq3PSnV-?IapBzUdeCEYFs;ky zu6*p`)!6%iU`tviMiY)S>h9IQ1#IFXq+NXjLU*qvO4|NYLVO+>1IVXQu855e3NU9V ztP-|3hJ19x(vlG5qtCwoZK@^PcK;!L$ZgjQ7hW}PFs#m`s=*V^>C!~@8kYwyIZd@U z-cFW=JdjJ%8DJ}qO9Rfv4LzKC#1n+bX-dXd+0sMHuMTpr+1U2;whL){gv}McgfP1U z-YVd&-$_nRza16dFmJpaO<}p6)OB#f!#F<*6e?{Lzl_s6dXzYQH64|kGtunMpaVEMswK4FY5Oob@V6?4~lw}$(*d6`CU<*KoYst^RW#S_aSkjmbeq=&2HSu2jQ#}0wXY~j&o~8GRky^z$%!F~r zInTvu#+jmx2;^zS_FR^+AGz={uJB1DLv!aZ75-C{BX2m3Ro@sRZgO?1E<)VovDgBH z&>cV>*|mu1un(ZHAl%Zz;cJ$lxZvPm9{dz!iS0rjZ`Z-`);takA`c6N2MUkTq3iPC zJC4Nj&lQ+WoOVe5AVH3QP{xG=t`&49-IXQrP{#S>Prk?wd2CfTl#TI$AFj#iJ<*Hx zQK=KCS7N^(L#{H1?cdvJw6pBuXQxjDFTcn}c6hxvvQ)B4TyP6e7CYDw0Zr8&$J_C5 z!F2zx=k^Z|0lB|!&p~Fg@-zxyUrBf(j(kIbl~X9Q`lN~82}0a2PWI4FOZOP^nt~`Q z^;wVq$=r9C{B0)NNK)zkI4j=IMDT5hbbNb;)1eU6E;BeEXGbQWWiIh5{kh0wqkkF& zJ^BgH1Qc>?sHO_Iv&*RI#6ywpkqD$u;R6$+KG11#gd=75bXvfAnHd@99hfIeCyvZ`EN|#VDev?(5Y3)bvD}w`IYI|uUxn&CqiOOOpYHwy$FSsP|IR+u&h!PJaaVN5~EE`DT{8ZoI8)RAPe=y<5_M1rTozZ_5h8u}S$eHV~ hSEOP+BkIB`+2s@E50qamKa2Z(`FQ!k@+stQ{XcT}+EM@j literal 0 HcmV?d00001 diff --git a/Dreamer/dmc2gym/__init__.py b/Dreamer/dmc2gym/__init__.py new file mode 100644 index 0000000..727957a --- /dev/null +++ b/Dreamer/dmc2gym/__init__.py @@ -0,0 +1,52 @@ +import gym +from gym.envs.registration import register + + +def make( + domain_name, + task_name, + resource_files, + img_source, + total_frames, + seed=1, + visualize_reward=True, + from_pixels=False, + height=84, + width=84, + camera_id=0, + frame_skip=1, + episode_length=1000, + environment_kwargs=None +): + env_id = 'dmc_%s_%s_%s-v1' % (domain_name, task_name, seed) + + if from_pixels: + assert not visualize_reward, 'cannot use visualize reward when learning from pixels' + + # shorten episode length + max_episode_steps = (episode_length + frame_skip - 1) // frame_skip + + if not env_id in gym.envs.registry.env_specs: + register( + id=env_id, + entry_point='dmc2gym.wrappers:DMCWrapper', + kwargs={ + 'domain_name': domain_name, + 'task_name': task_name, + 'resource_files': resource_files, + 'img_source': img_source, + 'total_frames': total_frames, + 'task_kwargs': { + 'random': seed + }, + 'environment_kwargs': environment_kwargs, + 'visualize_reward': visualize_reward, + 'from_pixels': from_pixels, + 'height': height, + 'width': width, + 'camera_id': camera_id, + 'frame_skip': frame_skip, + }, + # max_episode_steps=max_episode_steps + ) + return gym.make(env_id) diff --git a/Dreamer/dmc2gym/__pycache__/__init__.cpython-37.pyc b/Dreamer/dmc2gym/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2a634fa945295e0b6ca0772db6b49d1c8ff2f42c GIT binary patch literal 991 zcmY*YOK%e~5Vm(Wo5v>Gv;vB#R3an}kxC`P;}X)5;u zT=@qaK%DqXzH*^|fdgW^+w$pktrYH)I%HVjMXfQriMCl}Zb^r06+1xI98}`NJn<61x~2 zeVpq!_i!qWz^NgIn#Qqzi(JH}3EL)wZH>#lr|+ognrM@7|28qv9dfZ|Vom4X+@A-F zCd_ri#5}0-J>#2zH%voP(=(xInwDvsj){2qsmGfieM&U_9Gduxque??U*p+?-B3!S9w7%)eTqbg~EoTkcdi2PJSk!K>;?8?{wL!H&sZRFUpnyn3ILQZGeHkOj> znQc$7DkMyD+o@JyY9Y&^Z$0!xSQb{SEM%^2Sl1smZTvzh0a_OMD}Vy5kNdU(Qc>6U z4=dY7o2gPv6nJ<*Hpr5N81|uw2DBWwE(6zu5Fyw=3BjS~Y}YEV=D;-zxNdABxq>ZB!52Z)4g%@NUO>Bk-;1f|KCi+`3;*h9cY`W(*sdDl=l}xDuED*9 z$_uXU;V^(rl=i0U7k6yC^{auzooRJkRjG>nKQM4ft*2SRPg3#B6;(G7TD03H^#AWS DET#&y literal 0 HcmV?d00001 diff --git a/Dreamer/dmc2gym/__pycache__/natural_imgsource.cpython-37.pyc b/Dreamer/dmc2gym/__pycache__/natural_imgsource.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..99eabcffdd1ed7c6fecca063c877cd587a3fecd6 GIT binary patch literal 2532 zcma)8&2QX96rUM?uJb`6+O(li`9i@_*+oSNsj8qfB^3y*P>N_VD#rE9ZoKu{+Zk`N zSvjYc3ljfBa)dMdD;#{~LjDC#yf@Bn)1(5tnm1o>-kW*9-<#PxOG_Ss&)NKq{|>D` zu`~NjXxxUP?t}7aZHMuhQW-i%aN=d^eRvJ`{P`)a%O(_sG0drW;Bj7d$~5lWl_DMr+2OlyaC{+a)5IL^eepl}_pb{hW zV(Zd^Zf!}SM0pvS%_i7%_4Fq6Y&WoLH;QsNRut7<6b%bL&ak~0MNh|Zb~58c5igP` zk~TVWUiPL#hAt%@b`gp~HP*%ory7Ld!Z$nXS5Oc0Aa)X=Ck#WGW=L8t%a2INe zx3~wj%@=@qOZDn@obzJ%P0B^lP;Tp(gP{an;)gZ5*o30CK~z>nB2rP1v|=DxX&jmr z8L%Tt3D%re=8tS0+7YW@CbB96q|H&=fmsr{6*+7j0VOpLjp?H~?LL+LGbGgPy4i5= zg&q>Emu4bMRe95oeXNn9yLb{kqvkfd61g9EKqeXQt0c}uZE3T$JF`*SQb(e8)Xuoq z0}XE&Pa6#&FpO1befJ23K<`6%3NI8s`Nc4Z74ZX<-JYGw@&Q9`kB2RJ7U~}zFAqf$kW|_*PLlR0DWNXLU-!E zH%cc_Z&=nQLK@J}E5_w$T=Fz2)4w3ba07Yyco_90B%{&E?66 zj^@Bm(5T1{q@)9OXmDC$np)535hkJyDd<4oKEi|~ldb)Y|D*R6c|e|Sn51-{cyd4? zg(=zF_!h<+KjRydG#f&Gw>78S8E^$HR&2%rVM^q+fWwi^wRl2>dSi`L(C`4O&!B6 zXh4*Ry!z_sX8`kdj7emTAb9i{*4z}YUdWjN*Z%K6jH-^XrXY{?~9v=ulKSt45~(1OH( z=}-jNap^xb@m%`6jS^WoH-7g~m literal 0 HcmV?d00001 diff --git a/Dreamer/dmc2gym/__pycache__/wrappers.cpython-37.pyc b/Dreamer/dmc2gym/__pycache__/wrappers.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..62e27d3c0b0c65a918be437d74b88da95beb66a5 GIT binary patch literal 5821 zcma)A&2JmW72nw}Ee#8%CTS6*jbl3v+6Y0?#=zF?V#QffOOdbm-ZBpKn^XCOVOf-UJCTqe_;;2^yFJFz2wsV-Yi8?iUWkiyq%qS zGxO%XH^290Z_dqC4E+A}-@oubpEHbq)62onM&)}b@ z)AnusbbJ?2t6v&;zBl11`xV_+^=o+A{kcKiucOZi=ef&E4-LP;JzmChfme7H&&4+k zUgL8Q4L%pK-ibZSU*hJwMyvh^au_XB+3i$B%9X>g9m$mzQ;r-)ZHdZ45@s7B><96n zE7J|pj_$M>jlz$fR9;7s9!PG4*ajwMG)HAE?ydz%8q27~{JM;?;IQS7hA3gg=U1<< zrGx1DX2ioJyPn12^;;qe2a&kW2kjf(?ZNd2A{-7QA+HU$m7b*APgfz(W+>)pVH3nK zMHzG|8NE0tTC$&`!WlOo8oR7(bPdzkHFvG8tGRK@_~}!-cFuBpkKHqOon1FKM@H_9 z88dPVHJ57lPv12%)-!XHTio6^cT4)sn!I^=sb}S0&ssNSKeIn%nbULgQtpAP#GQQ$ z`fyjKqLB}fXz!xPk3n*yr>Dp8^OHJFbzV=!Moz{?F4{|DHfFml^KxU4W#wLF-4yq* zL=~l$Gf<1OeRBz$%gw5hn;lqq$~_0}--4%p$hm7_*7@8zMG`dVNh7rROzi?Z+a5-( z1wsC-%w(uaoqif-FMe}UdEthtzb0iQvN%m{3y}(C#Yv{@p-4GAXe}H@%BD#KZCW|6 z2@!4+Pe<8l5=p;yGu=pd#ETARClaYtr9qs;gN*?y;TBa|wqjCQJRT@#l?I4qOsCza zAmn;#KHADe*v`p(-zNi3dg_;8}KZC(iLoBng>5C}hd9<}rra$tC@1e+tAd%rSZull=F(4MF z7N?jyoOL`6mkf|wz@30<%7C4LeRTjk6A z6rMGHnxDaQj$h#C`1ywbKF*-7i>h+#t=I1sShjNK(T^aQR^TJ@vo`q+{m5Yn7q!j) ziZ0G_ciokZ45II`4_S`$AF#X(YUUMCE3bmuc@5ObZBREW^*n%@nG+1nackc}YdHgV zuRN-bjV{|`XsKm$y*jA^?X})~*61y8cb{fh%sugY-CH8nV6{ruLjQ?L|5CCvneF7* z7%h*XAdFg~5#TyiHHgw`z0;#JW1!5;+-Gw{w)OHyOngG~cFf&6S_sWP&(4j`=k+no zbs_l_t<`lCe2ZwmIBow;c4>4eYxcf2dV+itm^h!$W5km=?eeL-4thCzI-lQTA2GQC zz12p~5U19j&4GlNbC{j$T>s49krK(Yj}P?fV#t{2s+uC6}_MOUiEW-K?te!LR}B0>{ai&3%} zi!>QTNfs1bUGgi#_|M-DD&K9{;%T%9I!Xm>(RnJo@qeg2vOtv6PWym#M+fkoC*P(M zD3a#;yG@=(vYDh=a}Z|jwPu)XH#>132iT1VVKp-9MRpMs=|%H$F*lDoaT4)s;su%uI80h=J*N>?)q;*l2f&T3sE>#d ztVO`hOu4~>m}hIM60|9W2!oibDp>In$n|)Lm&1h+p*G?S5eh+#D?5PZG=8coq6i|I zMqRZSWMU%Eq zd6`(Nv$l69rgsXKZMY_>K{0$X|V2IDOAO=!l11Yc3vypLH$Ty9iLzs@3;f_j3lYxQ1 zJfLRBI)iNVNSj4%>-MqM$8p*P=NJh)R`Et<%Y+#aJ$7%FpCT9$~g2hkxyF!+d+tdXTiV(Nl63dlxk7YT8TdT$fCL*)BJiiEG9 zc4OdKfa4;5jAmaSPYruOY2cb1rV^OaPKVn%Em75(L*7CbghAkp36ZjBD4wHtq=Rx@ znXBF7=JUvUgdD#Hn>-C-Xg?#rtH7h~=>%{IwWB^Y88tPhje%32qH}=MPG36cNUX4px&?i?bq*;cr4~!%5a2(h-}PL$pip1oqm(F1o%(BZBsUgs z5z#S6N1qjXr5hZ=k=BY^fUIB9h|8mX7Wx$psJeb!H}e6q?@io3CV2ZhOeJ+5g=5&5 z{A+CI?5wLF9WqUok!T&>WL>;qcL(=a@ixiGw2a4PSeP+xfyN^+Pp+gp=Vw;_0^?U( zo6^KDS$pYm!Qj)GSCIOl0OC+exM#VqOtbA%_II` z%^LtSQ@0N42VWNK!jo=dn9xq74OF0@jxooqq}x6t@yN0Ufi&BhpAnHVi>YXYdURYg z@_mBJp=ij$P0IOjQ_^>F^w@=P@>%NPKpd{oum>DOX6%iPnBh~ycQWMby~-Zbw=Ozr z&Zw&AaFC@tz1nb%vzO@TIiopXn47y|#I+ zvx|F}Uq5PMd_O}5cfe8fGX)m;ti_X z1XP5LI>WU4IM`y7ICnu>UU4wCsNomH?&yoOVu6tKYI9umHOw71%LnNG6N;pZv4I0y zL9yAA*?@uSh@liy>56;QSP*9FxrAv`Tzdj9g|VPQ!Yp{v7{_{Qup%5Fg4#N8Vv`X* zN@*O|8$y^MAkuO_R^nfw_a7+wW0yg)5%M->TuINU%A&g(=zDLQ0xA9INkzNW9aQKl zjn6zwbg|3X(RF*^x~Rg=`Tw#KgEWf%W2Zz zZpw|}Fcn#Ix_)zz^5|-_8*fGlGV3n{!d9v9hFh3pCgL5sM1qckuw~O^$0F%*qZ0o= zQ6xdW!IrcEDJ9xjIHD$sv#v3)}NIEKAjm# bQmFD(igG%MKsu`bHKJzCv`x?UmObzPYOjtb literal 0 HcmV?d00001 diff --git a/Dreamer/dmc2gym/natural_imgsource.py b/Dreamer/dmc2gym/natural_imgsource.py new file mode 100644 index 0000000..6205815 --- /dev/null +++ b/Dreamer/dmc2gym/natural_imgsource.py @@ -0,0 +1,82 @@ +# This code provides the class that is used to generate backgrounds for the natural background setting +# the class is used inside an environment wrapper and will be called each time the env generates an observation +# the code is largely based on https://github.com/facebookresearch/deep_bisim4control + +import random + +import cv2 +import numpy as np +import skvideo.io + + +class ImageSource(object): + """ + Source of natural images to be added to a simulated environment. + """ + + def get_image(self): + """ + Returns: + an RGB image of [h, w, 3] with a fixed shape. + """ + pass + + def reset(self): + """ Called when an episode ends. """ + pass + + +class RandomVideoSource(ImageSource): + def __init__(self, shape, filelist, random_bg=False, max_videos=100, grayscale=False): + """ + Args: + shape: [h, w] + filelist: a list of video files + """ + self.grayscale = grayscale + self.shape = shape + self.filelist = filelist + random.shuffle(self.filelist) + self.filelist = self.filelist[:max_videos] + self.max_videos = max_videos + self.random_bg = random_bg + self.current_idx = 0 + self._current_vid = None + self.reset() + + def load_video(self, vid_id): + fname = self.filelist[vid_id] + + if self.grayscale: + frames = skvideo.io.vread(fname, outputdict={"-pix_fmt": "gray"}) + else: + frames = skvideo.io.vread(fname, num_frames=1000) + + img_arr = np.zeros((frames.shape[0], self.shape[0], self.shape[1]) + ((3,) if not self.grayscale else (1,))) + for i in range(frames.shape[0]): + if self.grayscale: + img_arr[i] = cv2.resize(frames[i], (self.shape[1], self.shape[0]))[..., None] # THIS IS NOT A BUG! cv2 uses (width, height) + else: + img_arr[i] = cv2.resize(frames[i], (self.shape[1], self.shape[0])) + return img_arr + + def reset(self): + del self._current_vid + self._video_id = np.random.randint(0, len(self.filelist)) + self._current_vid = self.load_video(self._video_id) + while True: + try: + self._video_id = np.random.randint(0, len(self.filelist)) + self._current_vid = self.load_video(self._video_id) + break + except Exception: + continue + self._loc = np.random.randint(0, len(self._current_vid)) + + def get_image(self): + if self.random_bg: + self._loc = np.random.randint(0, len(self._current_vid)) + else: + self._loc += 1 + img = self._current_vid[self._loc % len(self._current_vid)] + return img \ No newline at end of file diff --git a/Dreamer/dmc2gym/wrappers.py b/Dreamer/dmc2gym/wrappers.py new file mode 100644 index 0000000..855729d --- /dev/null +++ b/Dreamer/dmc2gym/wrappers.py @@ -0,0 +1,208 @@ +from gym import core, spaces +import glob +import os +import local_dm_control_suite as suite +from dm_env import specs +import numpy as np +import skimage.io + +from dmc2gym import natural_imgsource + +high_noise = False + +def set_global_var(set_high_noise): + global high_noise + high_noise = set_high_noise + +def _spec_to_box(spec): + def extract_min_max(s): + assert s.dtype == np.float64 or s.dtype == np.float32 + dim = np.int(np.prod(s.shape)) + if type(s) == specs.Array: + bound = np.inf * np.ones(dim, dtype=np.float32) + return -bound, bound + elif type(s) == specs.BoundedArray: + zeros = np.zeros(dim, dtype=np.float32) + return s.minimum + zeros, s.maximum + zeros + + mins, maxs = [], [] + for s in spec: + mn, mx = extract_min_max(s) + mins.append(mn) + maxs.append(mx) + low = np.concatenate(mins, axis=0) + high = np.concatenate(maxs, axis=0) + assert low.shape == high.shape + return spaces.Box(low, high, dtype=np.float32) + + +def _flatten_obs(obs): + obs_pieces = [] + for v in obs.values(): + flat = np.array([v]) if np.isscalar(v) else v.ravel() + obs_pieces.append(flat) + return np.concatenate(obs_pieces, axis=0) + + +class DMCWrapper(core.Env): + def __init__( + self, + domain_name, + task_name, + resource_files, + img_source, + total_frames, + task_kwargs=None, + visualize_reward={}, + from_pixels=False, + height=84, + width=84, + camera_id=0, + frame_skip=1, + environment_kwargs=None + ): + assert 'random' in task_kwargs, 'please specify a seed, for deterministic behaviour' + self._from_pixels = from_pixels + self._height = height + self._width = width + self._camera_id = camera_id + self._frame_skip = frame_skip + self._img_source = img_source + + # create task + self._env = suite.load( + domain_name=domain_name, + task_name=task_name, + task_kwargs=task_kwargs, + visualize_reward=visualize_reward, + environment_kwargs=environment_kwargs + ) + + # true and normalized action spaces + self._true_action_space = _spec_to_box([self._env.action_spec()]) + self._norm_action_space = spaces.Box( + low=-1.0, + high=1.0, + shape=self._true_action_space.shape, + dtype=np.float32 + ) + + # create observation space + if from_pixels: + self._observation_space = spaces.Box( + low=0, high=255, shape=[3, height, width], dtype=np.uint8 + ) + else: + self._observation_space = _spec_to_box( + self._env.observation_spec().values() + ) + + self._internal_state_space = spaces.Box( + low=-np.inf, + high=np.inf, + shape=self._env.physics.get_state().shape, + dtype=np.float32 + ) + + # background + if img_source is not None: + shape2d = (height, width) + if img_source == "color": + self._bg_source = natural_imgsource.RandomColorSource(shape2d) + elif img_source == "noise": + self._bg_source = natural_imgsource.NoiseSource(shape2d) + else: + files = glob.glob(os.path.expanduser(resource_files)) + self.files = files + self.total_frames = total_frames + self.shape2d = shape2d + assert len(files), "Pattern {} does not match any files".format( + resource_files + ) + if img_source == "images": + self._bg_source = natural_imgsource.RandomImageSource(shape2d, files, grayscale=False, max_videos=100, random_bg=False) + elif img_source == "video": + self._bg_source = natural_imgsource.RandomVideoSource(shape2d, files, grayscale=False, max_videos=100, random_bg=False) + else: + raise Exception("img_source %s not defined." % img_source) + + # set seed + self.seed(seed=task_kwargs.get('random', 1)) + + def __getattr__(self, name): + return getattr(self._env, name) + + def _get_obs(self, time_step): + if self._from_pixels: + obs = self.render( + height=self._height, + width=self._width, + camera_id=self._camera_id + ) + if self._img_source is not None: + mask = np.logical_and((obs[:, :, 2] > obs[:, :, 1]), (obs[:, :, 2] > obs[:, :, 0])) # hardcoded for dmc + bg = self._bg_source.get_image() + obs[mask] = bg[mask] + obs = obs.transpose(2, 0, 1).copy() + else: + obs = _flatten_obs(time_step.observation) + return obs + + def _convert_action(self, action): + action = action.astype(np.float64) + true_delta = self._true_action_space.high - self._true_action_space.low + norm_delta = self._norm_action_space.high - self._norm_action_space.low + action = (action - self._norm_action_space.low) / norm_delta + action = action * true_delta + self._true_action_space.low + action = action.astype(np.float32) + return action + + @property + def observation_space(self): + return self._observation_space + + @property + def internal_state_space(self): + return self._internal_state_space + + @property + def action_space(self): + return self._norm_action_space + + def seed(self, seed): + self._true_action_space.seed(seed) + self._norm_action_space.seed(seed) + self._observation_space.seed(seed) + + def step(self, action): + assert self._norm_action_space.contains(action) + action = self._convert_action(action) + assert self._true_action_space.contains(action) + reward = 0 + extra = {'internal_state': self._env.physics.get_state().copy()} + + for _ in range(self._frame_skip): + time_step = self._env.step(action) + reward += time_step.reward or 0 + done = time_step.last() + if done: + break + obs = self._get_obs(time_step) + extra['discount'] = time_step.discount + return obs, reward, done, extra + + def reset(self): + time_step = self._env.reset() + self._bg_source.reset() + #self._bg_source = natural_imgsource.RandomVideoSource(self.shape2d, self.files, grayscale=True, total_frames=self.total_frames, high_noise=high_noise) + obs = self._get_obs(time_step) + return obs + + def render(self, mode='rgb_array', height=None, width=None, camera_id=0): + assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode + height = height or self._height + width = width or self._width + camera_id = camera_id or self._camera_id + return self._env.physics.render( + height=height, width=width, camera_id=camera_id + ) \ No newline at end of file diff --git a/Dreamer/dreamers.py b/Dreamer/dreamers.py new file mode 100644 index 0000000..144ad4c --- /dev/null +++ b/Dreamer/dreamers.py @@ -0,0 +1,741 @@ +import tools +import models +from tensorflow_probability import distributions as tfd +from tensorflow.keras.mixed_precision import experimental as prec +import tensorflow as tf +import numpy as np +import collections +import functools +import json +import time + +from env_tools import preprocess, count_steps + +def load_dataset(directory, config): + episode = next(tools.load_episodes(directory, 1)) + types = {k: v.dtype for k, v in episode.items()} + shapes = {k: (None,) + v.shape[1:] for k, v in episode.items()} + + def generator(): return tools.load_episodes( + directory, config.train_steps, config.batch_length, + config.dataset_balance) + dataset = tf.data.Dataset.from_generator(generator, types, shapes) + dataset = dataset.batch(config.batch_size, drop_remainder=True) + dataset = dataset.map(functools.partial(preprocess, config=config)) + dataset = dataset.prefetch(10) + return dataset + +class Dreamer(tools.Module): + + def __init__(self, config, datadir, actspace, writer): + self._c = config + self._actspace = actspace + self._actdim = actspace.n if hasattr( + actspace, 'n') else actspace.shape[0] + self._writer = writer + self._random = np.random.RandomState(config.seed) + self._should_pretrain = tools.Once() + self._should_train = tools.Every(config.train_every) + self._should_log = tools.Every(config.log_every) + self._last_log = None + self._last_time = time.time() + self._metrics = collections.defaultdict(tf.metrics.Mean) + self._metrics['expl_amount'] # Create variable for checkpoint. + self._float = prec.global_policy().compute_dtype + self._strategy = tf.distribute.MirroredStrategy() + with tf.device('cpu:0'): + self._step = tf.Variable(count_steps( + datadir, config), dtype=tf.int64) + with self._strategy.scope(): + self._dataset = iter(self._strategy.experimental_distribute_dataset( + load_dataset(datadir, self._c))) + self._build_model() + + def __call__(self, obs, reset, state=None, training=True): + step = self._step.numpy().item() + tf.summary.experimental.set_step(step) + if state is not None and reset.any(): + mask = tf.cast(1 - reset, self._float)[:, None] + state = tf.nest.map_structure(lambda x: x * mask, state) + if self._should_train(step): + log = self._should_log(step) + n = self._c.pretrain if self._should_pretrain() else self._c.train_steps + print(f'Training for {n} steps.') + with self._strategy.scope(): + for train_step in range(n): + log_images = self._c.log_images and log and train_step == 0 + self.train(next(self._dataset), log_images) + if log: + self._write_summaries() + action, state = self.policy(obs, state, training) + if training: + self._step.assign_add(len(reset) * self._c.action_repeat) + return action, state + + @tf.function + def policy(self, obs, state, training): + if state is None: + latent = self._dynamics.initial(len(obs['image'])) + action = tf.zeros((len(obs['image']), self._actdim), self._float) + else: + latent, action = state + embed = self._encode(preprocess(obs, self._c)) + latent, _ = self._dynamics.obs_step(latent, action, embed) + feat = self._dynamics.get_feat(latent) + if training: + action = self._actor(feat).sample() + else: + action = self._actor(feat).mode() + action = self._exploration(action, training) + state = (latent, action) + return action, state + + def load(self, filename): + super().load(filename) + self._should_pretrain() + + @tf.function() + def train(self, data, log_images=False): + self._strategy.run( + self._train, args=(data, log_images)) + + def _train(self, data, log_images): + with tf.GradientTape() as model_tape: + data["image"] = tf.transpose(data["image"], perm=[0, 1, 3, 4, 2]) + embed = self._encode(data) + post, prior = self._dynamics.observe(embed, data['action']) + feat = self._dynamics.get_feat(post) + + image_pred = self._decode(feat) + reward_pred = self._reward(feat) + + likes = tools.AttrDict() + likes.image = tf.reduce_mean(image_pred.log_prob(data['image'])) + likes.reward = tf.reduce_mean(reward_pred.log_prob(data['reward'])) + if self._c.pcont: + pcont_pred = self._pcont(feat) + pcont_target = self._c.discount * data['discount'] + likes.pcont = tf.reduce_mean(pcont_pred.log_prob(pcont_target)) + likes.pcont *= self._c.pcont_scale + + prior_dist = self._dynamics.get_dist(prior) + post_dist = self._dynamics.get_dist(post) + div = tf.reduce_mean(tfd.kl_divergence(post_dist, prior_dist)) + div = tf.maximum(div, self._c.free_nats) + + model_loss = self._c.kl_scale * div - sum(likes.values()) + model_loss /= float(self._strategy.num_replicas_in_sync) + + with tf.GradientTape() as actor_tape: + imag_feat = self._imagine_ahead(post) + reward = self._reward(imag_feat).mode() + if self._c.pcont: + pcont = self._pcont(imag_feat).mean() + else: + pcont = self._c.discount * tf.ones_like(reward) + value = self._value(imag_feat).mode() + returns = tools.lambda_return( + reward[:-1], value[:-1], pcont[:-1], + bootstrap=value[-1], lambda_=self._c.disclam, axis=0) + discount = tf.stop_gradient(tf.math.cumprod(tf.concat( + [tf.ones_like(pcont[:1]), pcont[:-2]], 0), 0)) + actor_loss = -tf.reduce_mean(discount * returns) + actor_loss /= float(self._strategy.num_replicas_in_sync) + + with tf.GradientTape() as value_tape: + value_pred = self._value(imag_feat)[:-1] + target = tf.stop_gradient(returns) + value_loss = - \ + tf.reduce_mean(discount * value_pred.log_prob(target)) + value_loss /= float(self._strategy.num_replicas_in_sync) + + model_norm = self._model_opt(model_tape, model_loss) + actor_norm = self._actor_opt(actor_tape, actor_loss) + value_norm = self._value_opt(value_tape, value_loss) + + if tf.distribute.get_replica_context().replica_id_in_sync_group == 0: + if self._c.log_scalars: + self._scalar_summaries( + data, feat, prior_dist, post_dist, likes, div, + model_loss, value_loss, actor_loss, model_norm, value_norm, + actor_norm) + if tf.equal(log_images, True): + self._image_summaries(data, embed, image_pred) + + def _build_model(self): + acts = dict( + elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish, + leaky_relu=tf.nn.leaky_relu) + cnn_act = acts[self._c.cnn_act] + act = acts[self._c.dense_act] + self._encode = models.ConvEncoder( + self._c.cnn_depth, cnn_act, self._c.image_size) + self._dynamics = models.RSSM( + self._c.stoch_size, self._c.deter_size, self._c.deter_size) + self._decode = models.ConvDecoder( + self._c.cnn_depth, cnn_act, (self._c.image_size, self._c.image_size, 3)) + self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act) + if self._c.pcont: + self._pcont = models.DenseDecoder( + (), 3, self._c.num_units, 'binary', act=act) + self._value = models.DenseDecoder((), 3, self._c.num_units, act=act) + self._actor = models.ActionDecoder( + self._actdim, 4, self._c.num_units, self._c.action_dist, + init_std=self._c.action_init_std, act=act) + model_modules = [self._encode, self._dynamics, + self._decode, self._reward] + if self._c.pcont: + model_modules.append(self._pcont) + Optimizer = functools.partial( + tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip, + wdpattern=self._c.weight_decay_pattern) + self._model_opt = Optimizer('model', model_modules, self._c.model_lr) + self._value_opt = Optimizer('value', [self._value], self._c.value_lr) + self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr) + self.train(next(self._dataset)) + + def _exploration(self, action, training): + if training: + amount = self._c.expl_amount + if self._c.expl_decay: + amount *= 0.5 ** (tf.cast(self._step, + tf.float32) / self._c.expl_decay) + if self._c.expl_min: + amount = tf.maximum(self._c.expl_min, amount) + self._metrics['expl_amount'].update_state(amount) + elif self._c.eval_noise: + amount = self._c.eval_noise + else: + return action + if self._c.expl == 'additive_gaussian': + return tf.clip_by_value(tfd.Normal(action, amount).sample(), -1, 1) + if self._c.expl == 'completely_random': + return tf.random.uniform(action.shape, -1, 1) + if self._c.expl == 'epsilon_greedy': + indices = tfd.Categorical(0 * action).sample() + # pylint: disable=unexpected-keyword-arg, no-value-for-parameter + return tf.where( + tf.random.uniform(action.shape[:1], 0, 1) < amount, + tf.one_hot(indices, action.shape[-1], dtype=self._float), + action) + raise NotImplementedError(self._c.expl) + + def _imagine_ahead(self, post): + if self._c.pcont: # Last step could be terminal. + post = {k: v[:, :-1] for k, v in post.items()} + + def flatten(x): return tf.reshape(x, [-1] + list(x.shape[2:])) + start = {k: flatten(v) for k, v in post.items()} + + def policy(state): return self._actor( + tf.stop_gradient(self._dynamics.get_feat(state))).sample() + states = tools.static_scan( + lambda prev, _: self._dynamics.img_step(prev, policy(prev)), + tf.range(self._c.horizon), start) + imag_feat = self._dynamics.get_feat(states) + return imag_feat + + def _scalar_summaries( + self, data, feat, prior_dist, post_dist, likes, div, + model_loss, value_loss, actor_loss, model_norm, value_norm, + actor_norm): + self._metrics['model_grad_norm'].update_state(model_norm) + self._metrics['value_grad_norm'].update_state(value_norm) + self._metrics['actor_grad_norm'].update_state(actor_norm) + self._metrics['prior_ent'].update_state(prior_dist.entropy()) + self._metrics['post_ent'].update_state(post_dist.entropy()) + for name, logprob in likes.items(): + self._metrics[name + '_loss'].update_state(-logprob) + self._metrics['div'].update_state(div) + self._metrics['model_loss'].update_state(model_loss) + self._metrics['value_loss'].update_state(value_loss) + self._metrics['actor_loss'].update_state(actor_loss) + self._metrics['action_ent'].update_state(self._actor(feat).entropy()) + + def _image_summaries(self, data, embed, image_pred): + truth = data['image'][:6] + 0.5 + recon = image_pred.mode()[:6] + init, _ = self._dynamics.observe(embed[:6, :5], data['action'][:6, :5]) + init = {k: v[:, -1] for k, v in init.items()} + prior = self._dynamics.imagine(data['action'][:6, 5:], init) + openl = self._decode(self._dynamics.get_feat(prior)).mode() + model = tf.concat([recon[:, :5] + 0.5, openl + 0.5], 1) + error = (model - truth + 1) / 2 + openl = tf.concat([truth, model, error], 2) + tools.graph_summary( + self._writer, tools.video_summary, self._step, 'agent/openl', openl) + + def image_summary_from_data(self, data): + truth = data['image'][:6] + 0.5 + embed = self._encode(data) + post, _ = self._dynamics.observe( + embed[:6, :5], data['action'][:6, :5]) + feat = self._dynamics.get_feat(post) + init = {k: v[:, -1] for k, v in post.items()} + recon = self._decode(feat).mode()[:6] + prior = self._dynamics.imagine(data['action'][:6, 5:], init) + openl = self._decode(self._dynamics.get_feat(prior)).mode() + model = tf.concat([recon[:, :5] + 0.5, openl + 0.5], 1) + error = (model - truth + 1) / 2 + openl = tf.concat([truth, model, error], 2) + tools.graph_summary( + self._writer, tools.video_summary, self._step, 'agent/eval_openl', openl) + + def _write_summaries(self): + step = int(self._step.numpy()) + metrics = [(k, float(v.result())) for k, v in self._metrics.items()] + if self._last_log is not None: + duration = time.time() - self._last_time + self._last_time += duration + metrics.append(('fps', (step - self._last_log) / duration)) + self._last_log = step + [m.reset_states() for m in self._metrics.values()] + with (self._c.logdir / 'metrics.jsonl').open('a') as f: + f.write(json.dumps({'step': step, **dict(metrics)}) + '\n') + [tf.summary.scalar('agent/' + k, m) for k, m in metrics] + print(f'[{step}]', ' / '.join(f'{k} {v:.1f}' for k, v in metrics)) + self._writer.flush() + + +class SeparationDreamer(Dreamer): + + def __init__(self, config, datadir, actspace, writer): + self._metrics_disen = collections.defaultdict(tf.metrics.Mean) + self._metrics_disen['expl_amount'] + super().__init__(config, datadir, actspace, writer) + + def _train(self, data, log_images): + with tf.GradientTape(persistent=True) as model_tape: + + # main + data["image"] = tf.transpose(data["image"], perm=[0, 1, 3, 4, 2]) + embed = self._encode(data) + post, prior = self._dynamics.observe(embed, data['action']) + feat = self._dynamics.get_feat(post) + + # disen + embed_disen = self._disen_encode(data) + post_disen, prior_disen = self._disen_dynamics.observe( + embed_disen, data['action']) + feat_disen = self._disen_dynamics.get_feat(post_disen) + + # disen image pred + image_pred_disen = self._disen_only_decode(feat_disen) + + # joint image pred + image_pred_joint, image_pred_joint_main, image_pred_joint_disen, mask_pred = self._joint_decode( + feat, feat_disen) + + # reward pred + reward_pred = self._reward(feat) + + # optimize disen reward predictor till optimal + for _ in range(self._c.num_reward_opt_iters): + with tf.GradientTape() as disen_reward_tape: + reward_pred_disen = self._disen_reward( + tf.stop_gradient(feat_disen)) + reward_like_disen = reward_pred_disen.log_prob( + data['reward']) + reward_loss_disen = -tf.reduce_mean(reward_like_disen) + reward_loss_disen /= float( + self._strategy.num_replicas_in_sync) + reward_disen_norm = self._disen_reward_opt( + disen_reward_tape, reward_loss_disen) + + # disen reward pred with optimal reward predictor + reward_pred_disen = self._disen_reward(feat_disen) + reward_like_disen = tf.reduce_mean( + reward_pred_disen.log_prob(data['reward'])) + + # main model loss + likes = tools.AttrDict() + likes.image = tf.reduce_mean( + image_pred_joint.log_prob(data['image'])) + likes.reward = tf.reduce_mean(reward_pred.log_prob( + data['reward'])) * self._c.reward_scale + if self._c.pcont: + pcont_pred = self._pcont(feat) + pcont_target = self._c.discount * data['discount'] + likes.pcont = tf.reduce_mean(pcont_pred.log_prob(pcont_target)) + likes.pcont *= self._c.pcont_scale + + prior_dist = self._dynamics.get_dist(prior) + post_dist = self._dynamics.get_dist(post) + div = tf.reduce_mean(tfd.kl_divergence(post_dist, prior_dist)) + div = tf.maximum(div, self._c.free_nats) + + model_loss = self._c.kl_scale * div - sum(likes.values()) + model_loss /= float(self._strategy.num_replicas_in_sync) + + # disen model loss with reward negative gradient + likes_disen = tools.AttrDict() + likes_disen.image = tf.reduce_mean( + image_pred_joint.log_prob(data['image'])) + likes_disen.disen_only = tf.reduce_mean( + image_pred_disen.log_prob(data['image'])) + + reward_like_disen = reward_pred_disen.log_prob(data['reward']) + reward_like_disen = tf.reduce_mean(reward_like_disen) + reward_loss_disen = -reward_like_disen + + prior_dist_disen = self._disen_dynamics.get_dist(prior_disen) + post_dist_disen = self._disen_dynamics.get_dist(post_disen) + div_disen = tf.reduce_mean(tfd.kl_divergence( + post_dist_disen, prior_dist_disen)) + div_disen = tf.maximum(div_disen, self._c.free_nats) + + model_loss_disen = div_disen * self._c.disen_kl_scale + \ + reward_like_disen * self._c.disen_neg_rew_scale - \ + likes_disen.image - likes_disen.disen_only * self._c.disen_rec_scale + model_loss_disen /= float(self._strategy.num_replicas_in_sync) + + decode_loss = model_loss_disen + model_loss + + with tf.GradientTape() as actor_tape: + imag_feat = self._imagine_ahead(post) + reward = self._reward(imag_feat).mode() + if self._c.pcont: + pcont = self._pcont(imag_feat).mean() + else: + pcont = self._c.discount * tf.ones_like(reward) + value = self._value(imag_feat).mode() + returns = tools.lambda_return( + reward[:-1], value[:-1], pcont[:-1], + bootstrap=value[-1], lambda_=self._c.disclam, axis=0) + discount = tf.stop_gradient(tf.math.cumprod(tf.concat( + [tf.ones_like(pcont[:1]), pcont[:-2]], 0), 0)) + actor_loss = -tf.reduce_mean(discount * returns) + actor_loss /= float(self._strategy.num_replicas_in_sync) + + with tf.GradientTape() as value_tape: + value_pred = self._value(imag_feat)[:-1] + target = tf.stop_gradient(returns) + value_loss = - \ + tf.reduce_mean(discount * value_pred.log_prob(target)) + value_loss /= float(self._strategy.num_replicas_in_sync) + + model_norm = self._model_opt(model_tape, model_loss) + model_disen_norm = self._disen_opt(model_tape, model_loss_disen) + decode_norm = self._decode_opt(model_tape, decode_loss) + actor_norm = self._actor_opt(actor_tape, actor_loss) + value_norm = self._value_opt(value_tape, value_loss) + + if tf.distribute.get_replica_context().replica_id_in_sync_group == 0: + if self._c.log_scalars: + self._scalar_summaries( + data, feat, prior_dist, post_dist, likes, div, + model_loss, value_loss, actor_loss, model_norm, value_norm, actor_norm) + self._scalar_summaries_disen( + prior_dist_disen, post_dist_disen, likes_disen, div_disen, + model_loss_disen, reward_loss_disen, + model_disen_norm, reward_disen_norm) + if tf.equal(log_images, True): + self._image_summaries_joint( + data, embed, embed_disen, image_pred_joint, mask_pred) + self._image_summaries( + self._disen_dynamics, self._disen_decode, data, embed_disen, image_pred_joint_disen, tag='disen/openl_joint_disen') + self._image_summaries( + self._disen_dynamics, self._disen_only_decode, data, embed_disen, image_pred_disen, tag='disen_only/openl_disen_only') + self._image_summaries( + self._dynamics, self._main_decode, data, embed, image_pred_joint_main, tag='main/openl_joint_main') + + def _build_model(self): + acts = dict( + elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish, + leaky_relu=tf.nn.leaky_relu) + cnn_act = acts[self._c.cnn_act] + act = acts[self._c.dense_act] + + # Distractor dynamic model + self._disen_encode = models.ConvEncoder( + self._c.disen_cnn_depth, cnn_act, self._c.image_size) + self._disen_dynamics = models.RSSM( + self._c.disen_stoch_size, self._c.disen_deter_size, self._c.disen_deter_size) + self._disen_only_decode = models.ConvDecoder( + self._c.disen_cnn_depth, cnn_act, (self._c.image_size, self._c.image_size, 3)) + self._disen_reward = models.DenseDecoder( + (), 2, self._c.num_units, act=act) + + # Task dynamic model + self._encode = models.ConvEncoder( + self._c.cnn_depth, cnn_act, self._c.image_size) + self._dynamics = models.RSSM( + self._c.stoch_size, self._c.deter_size, self._c.deter_size) + self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act) + if self._c.pcont: + self._pcont = models.DenseDecoder( + (), 3, self._c.num_units, 'binary', act=act) + self._value = models.DenseDecoder((), 3, self._c.num_units, act=act) + self._actor = models.ActionDecoder( + self._actdim, 4, self._c.num_units, self._c.action_dist, + init_std=self._c.action_init_std, act=act) + + # Joint decode + self._main_decode = models.ConvDecoderMask( + self._c.cnn_depth, cnn_act, (self._c.image_size, self._c.image_size, 3)) + self._disen_decode = models.ConvDecoderMask( + self._c.disen_cnn_depth, cnn_act, (self._c.image_size, self._c.image_size, 3)) + self._joint_decode = models.ConvDecoderMaskEnsemble( + self._main_decode, self._disen_decode, self._c.precision + ) + + disen_modules = [self._disen_encode, + self._disen_dynamics, self._disen_only_decode] + model_modules = [self._encode, self._dynamics, self._reward] + if self._c.pcont: + model_modules.append(self._pcont) + + Optimizer = functools.partial( + tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip, + wdpattern=self._c.weight_decay_pattern) + self._model_opt = Optimizer('model', model_modules, self._c.model_lr) + self._disen_opt = Optimizer('disen', disen_modules, self._c.model_lr) + self._decode_opt = Optimizer( + 'decode', [self._joint_decode], self._c.model_lr) + self._disen_reward_opt = Optimizer( + 'disen_reward', [self._disen_reward], self._c.disen_reward_lr) + self._value_opt = Optimizer('value', [self._value], self._c.value_lr) + self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr) + self.train(next(self._dataset)) + + def _scalar_summaries_disen( + self, prior_dist_disen, post_dist_disen, likes_disen, div_disen, + model_loss_disen, reward_loss_disen, + model_disen_norm, reward_disen_norm): + self._metrics_disen['model_grad_norm'].update_state(model_disen_norm) + self._metrics_disen['reward_grad_norm'].update_state(reward_disen_norm) + self._metrics_disen['prior_ent'].update_state( + prior_dist_disen.entropy()) + self._metrics_disen['post_ent'].update_state(post_dist_disen.entropy()) + for name, logprob in likes_disen.items(): + self._metrics_disen[name + '_loss'].update_state(-logprob) + self._metrics_disen['div'].update_state(div_disen) + self._metrics_disen['model_loss'].update_state(model_loss_disen) + self._metrics_disen['reward_loss'].update_state( + reward_loss_disen) + + def _image_summaries(self, dynamics, decoder, data, embed, image_pred, tag='agent/openl'): + truth = data['image'][:6] + 0.5 + recon = image_pred.mode()[:6] + init, _ = dynamics.observe(embed[:6, :5], data['action'][:6, :5]) + init = {k: v[:, -1] for k, v in init.items()} + prior = dynamics.imagine(data['action'][:6, 5:], init) + if isinstance(decoder, models.ConvDecoderMask): + openl, _ = decoder(dynamics.get_feat(prior)) + openl = openl.mode() + else: + openl = decoder(dynamics.get_feat(prior)).mode() + model = tf.concat([recon[:, :5] + 0.5, openl + 0.5], 1) + error = (model - truth + 1) / 2 + openl = tf.concat([truth, model, error], 2) + tools.graph_summary( + self._writer, tools.video_summary, self._step, tag, openl) + + def _image_summaries_joint(self, data, embed, embed_disen, image_pred_joint, mask_pred): + truth = data['image'][:6] + 0.5 + recon_joint = image_pred_joint.mode()[:6] + mask_pred = mask_pred[:6] + + init, _ = self._dynamics.observe( + embed[:6, :5], data['action'][:6, :5]) + init_disen, _ = self._disen_dynamics.observe( + embed_disen[:6, :5], data['action'][:6, :5]) + init = {k: v[:, -1] for k, v in init.items()} + init_disen = {k: v[:, -1] for k, v in init_disen.items()} + prior = self._dynamics.imagine( + data['action'][:6, 5:], init) + prior_disen = self._disen_dynamics.imagine( + data['action'][:6, 5:], init_disen) + + feat = self._dynamics.get_feat(prior) + feat_disen = self._disen_dynamics.get_feat(prior_disen) + openl, _, _, openl_mask = self._joint_decode(feat, feat_disen) + + openl = openl.mode() + model = tf.concat([recon_joint[:, :5] + 0.5, openl + 0.5], 1) + error = (model - truth + 1) / 2 + openl = tf.concat([truth, model, error], 2) + openl_mask = tf.concat([mask_pred[:, :5] + 0.5, openl_mask + 0.5], 1) + + tools.graph_summary( + self._writer, tools.video_summary, self._step, 'joint/openl_joint', openl) + tools.graph_summary( + self._writer, tools.video_summary, self._step, 'mask/openl_mask', openl_mask) + + def image_summary_from_data(self, data): + truth = data['image'][:6] + 0.5 + + # main + embed = self._encode(data) + post, _ = self._dynamics.observe( + embed[:6, :5], data['action'][:6, :5]) + feat = self._dynamics.get_feat(post) + init = {k: v[:, -1] for k, v in post.items()} + + # disen + embed_disen = self._disen_encode(data) + post_disen, _ = self._disen_dynamics.observe( + embed_disen[:6, :5], data['action'][:6, :5]) + feat_disen = self._disen_dynamics.get_feat(post_disen) + init_disen = {k: v[:, -1] for k, v in post_disen.items()} + + # joint image pred + recon_joint, recon_main, recon_disen, recon_mask = self._joint_decode( + feat, feat_disen) + recon_joint = recon_joint.mode()[:6] + recon_main = recon_main.mode()[:6] + recon_disen = recon_disen.mode()[:6] + recon_mask = recon_mask[:6] + + prior = self._dynamics.imagine( + data['action'][:6, 5:], init) + prior_disen = self._disen_dynamics.imagine( + data['action'][:6, 5:], init_disen) + feat = self._dynamics.get_feat(prior) + feat_disen = self._disen_dynamics.get_feat(prior_disen) + + openl_joint, openl_main, openl_disen, openl_mask = self._joint_decode( + feat, feat_disen) + openl_joint = openl_joint.mode() + openl_main = openl_main.mode() + openl_disen = openl_disen.mode() + + model_joint = tf.concat( + [recon_joint[:, :5] + 0.5, openl_joint + 0.5], 1) + error_joint = (model_joint - truth + 1) / 2 + model_main = tf.concat( + [recon_main[:, :5] + 0.5, openl_main + 0.5], 1) + model_disen = tf.concat( + [recon_disen[:, :5] + 0.5, openl_disen + 0.5], 1) + model_mask = tf.concat( + [recon_mask[:, :5] + 0.5, openl_mask + 0.5], 1) + + output_joint = tf.concat( + [truth, model_main, model_disen, model_joint, error_joint], 2) + output_mask = model_mask + + tools.graph_summary( + self._writer, tools.video_summary, self._step, 'summary/openl', output_joint) + tools.graph_summary( + self._writer, tools.video_summary, self._step, 'summary/openl_mask', output_mask) + + def _write_summaries(self): + step = int(self._step.numpy()) + metrics = [(k, float(v.result())) for k, v in self._metrics.items()] + metrics_disen = [(k, float(v.result())) + for k, v in self._metrics_disen.items()] + if self._last_log is not None: + duration = time.time() - self._last_time + self._last_time += duration + metrics.append(('fps', (step - self._last_log) / duration)) + self._last_log = step + [m.reset_states() for m in self._metrics.values()] + [m.reset_states() for m in self._metrics_disen.values()] + with (self._c.logdir / 'metrics.jsonl').open('a') as f: + f.write(json.dumps({'step': step, **dict(metrics)}) + '\n') + [tf.summary.scalar('agent/' + k, m) for k, m in metrics] + [tf.summary.scalar('disen/' + k, m) for k, m in metrics_disen] + print('#'*30 + ' Main ' + '#'*30) + print(f'[{step}]', ' / '.join(f'{k} {v:.1f}' for k, v in metrics)) + print('#'*30 + ' Disen ' + '#'*30) + print(f'[{step}]', ' / '.join(f'{k} {v:.1f}' for k, v in metrics_disen)) + self._writer.flush() + +class InverseDreamer(Dreamer): + + def __init__(self, config, datadir, actspace, writer): + super().__init__(config, datadir, actspace, writer) + + def _train(self, data, log_images): + with tf.GradientTape() as model_tape: + embed = self._encode(data) + post, prior = self._dynamics.observe(embed, data['action']) + feat = self._dynamics.get_feat(post) + + action_pred = self._decode(feat) + reward_pred = self._reward(feat) + + likes = tools.AttrDict() + likes.action = tf.reduce_mean( + action_pred.log_prob(data['action'][:, :-1])) + likes.reward = tf.reduce_mean( + reward_pred.log_prob(data['reward'])) + if self._c.pcont: + pcont_pred = self._pcont(feat) + pcont_target = self._c.discount * data['discount'] + likes.pcont = tf.reduce_mean(pcont_pred.log_prob(pcont_target)) + likes.pcont *= self._c.pcont_scale + + prior_dist = self._dynamics.get_dist(prior) + post_dist = self._dynamics.get_dist(post) + div = tf.reduce_mean(tfd.kl_divergence(post_dist, prior_dist)) + div = tf.maximum(div, self._c.free_nats) + + model_loss = self._c.kl_scale * div - sum(likes.values()) + model_loss /= float(self._strategy.num_replicas_in_sync) + + with tf.GradientTape() as actor_tape: + imag_feat = self._imagine_ahead(post) + reward = self._reward(imag_feat).mode() + if self._c.pcont: + pcont = self._pcont(imag_feat).mean() + else: + pcont = self._c.discount * tf.ones_like(reward) + value = self._value(imag_feat).mode() + returns = tools.lambda_return( + reward[:-1], value[:-1], pcont[:-1], + bootstrap=value[-1], lambda_=self._c.disclam, axis=0) + discount = tf.stop_gradient(tf.math.cumprod(tf.concat( + [tf.ones_like(pcont[:1]), pcont[:-2]], 0), 0)) + actor_loss = -tf.reduce_mean(discount * returns) + actor_loss /= float(self._strategy.num_replicas_in_sync) + + with tf.GradientTape() as value_tape: + value_pred = self._value(imag_feat)[:-1] + target = tf.stop_gradient(returns) + value_loss = - \ + tf.reduce_mean(discount * value_pred.log_prob(target)) + value_loss /= float(self._strategy.num_replicas_in_sync) + + model_norm = self._model_opt(model_tape, model_loss) + actor_norm = self._actor_opt(actor_tape, actor_loss) + value_norm = self._value_opt(value_tape, value_loss) + + if tf.distribute.get_replica_context().replica_id_in_sync_group == 0: + if self._c.log_scalars: + self._scalar_summaries( + data, feat, prior_dist, post_dist, likes, div, + model_loss, value_loss, actor_loss, model_norm, value_norm, + actor_norm) + + def _build_model(self): + acts = dict( + elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish, + leaky_relu=tf.nn.leaky_relu) + cnn_act = acts[self._c.cnn_act] + act = acts[self._c.dense_act] + self._encode = models.ConvEncoder( + self._c.cnn_depth, cnn_act, self._c.image_size) + self._dynamics = models.RSSM( + self._c.stoch_size, self._c.deter_size, self._c.deter_size) + self._decode = models.InverseDecoder( + self._actdim, 4, self._c.num_units, act=act) + self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act) + if self._c.pcont: + self._pcont = models.DenseDecoder( + (), 3, self._c.num_units, 'binary', act=act) + self._value = models.DenseDecoder((), 3, self._c.num_units, act=act) + self._actor = models.ActionDecoder( + self._actdim, 4, self._c.num_units, self._c.action_dist, + init_std=self._c.action_init_std, act=act) + model_modules = [self._encode, self._dynamics, + self._decode, self._reward] + if self._c.pcont: + model_modules.append(self._pcont) + Optimizer = functools.partial( + tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip, + wdpattern=self._c.weight_decay_pattern) + self._model_opt = Optimizer('model', model_modules, self._c.model_lr) + self._value_opt = Optimizer('value', [self._value], self._c.value_lr) + self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr) + self.train(next(self._dataset)) \ No newline at end of file diff --git a/Dreamer/env_tools.py b/Dreamer/env_tools.py new file mode 100644 index 0000000..dd1d551 --- /dev/null +++ b/Dreamer/env_tools.py @@ -0,0 +1,84 @@ +import os +import json +import dmc2gym +import tensorflow as tf +from tensorflow.keras.mixed_precision import experimental as prec + +import tools +import wrappers + +def preprocess(obs, config): + dtype = prec.global_policy().compute_dtype + obs = obs.copy() + with tf.device('cpu:0'): + obs['image'] = tf.cast(obs['image'], dtype) / 255.0 - 0.5 + clip_rewards = dict(none=lambda x: x, tanh=tf.tanh)[ + config.clip_rewards] + obs['reward'] = clip_rewards(obs['reward']) + return obs + +def count_steps(datadir, config): + return tools.count_episodes(datadir)[1] * config.action_repeat + +def summarize_episode(episode, config, datadir, writer, prefix): + episodes, steps = tools.count_episodes(datadir) + length = (len(episode['reward']) - 1) * config.action_repeat + ret = episode['reward'].sum() + print(f'{prefix.title()} episode of length {length} with return {ret:.1f}.') + metrics = [ + (f'{prefix}/return', float(episode['reward'].sum())), + (f'{prefix}/length', len(episode['reward']) - 1), + ('episodes', episodes)] + + step = count_steps(datadir, config) + with (config.logdir / 'metrics.jsonl').open('a') as f: + f.write(json.dumps(dict([('step', step)] + metrics)) + '\n') + with writer.as_default(): # Env might run in a different thread. + tf.summary.experimental.set_step(step) + [tf.summary.scalar('sim/' + k, v) for k, v in metrics] + if prefix == 'test': + tools.video_summary(f'sim/{prefix}/video', episode['image'][None]) + +def make_env(config, writer, prefix, datadir, video_dir, store): + suite, domain_task_distractor = config.task.split('_', 1) + domain, task_distractor = domain_task_distractor.split('_', 1) + task, distractor = task_distractor.split('_', 1) + + if distractor == 'driving': + img_source = 'video' + total_frames = 1000 + resource_files = os.path.join(video_dir, '*.mp4') + elif distractor == 'noise': + img_source = 'noise' + total_frames = None + resource_files = None + elif distractor == 'none': + img_source = None + total_frames = None + resource_files = None + else: + raise NotImplementedError + + env = dmc2gym.make( + domain_name=domain, + task_name=task, + resource_files=resource_files, + img_source=img_source, + total_frames=total_frames, + seed=config.seed, + visualize_reward=False, + from_pixels=True, + height=config.image_size, + width=config.image_size, + frame_skip=config.action_repeat + ) + env = wrappers.DMC2GYMWrapper(env) + env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat) + callbacks = [] + if store: + callbacks.append(lambda ep: tools.save_episodes(datadir, [ep])) + callbacks.append( + lambda ep: summarize_episode(ep, config, datadir, writer, prefix)) + env = wrappers.Collect(env, callbacks, config.precision) + env = wrappers.RewardObs(env) + return env diff --git a/Dreamer/local_dm_control_suite/README.md b/Dreamer/local_dm_control_suite/README.md new file mode 100755 index 0000000..135ab42 --- /dev/null +++ b/Dreamer/local_dm_control_suite/README.md @@ -0,0 +1,56 @@ +# DeepMind Control Suite. + +This submodule contains the domains and tasks described in the +[DeepMind Control Suite tech report](https://arxiv.org/abs/1801.00690). + +## Quickstart + +```python +from dm_control import suite +import numpy as np + +# Load one task: +env = suite.load(domain_name="cartpole", task_name="swingup") + +# Iterate over a task set: +for domain_name, task_name in suite.BENCHMARKING: + env = suite.load(domain_name, task_name) + +# Step through an episode and print out reward, discount and observation. +action_spec = env.action_spec() +time_step = env.reset() +while not time_step.last(): + action = np.random.uniform(action_spec.minimum, + action_spec.maximum, + size=action_spec.shape) + time_step = env.step(action) + print(time_step.reward, time_step.discount, time_step.observation) +``` + +## Illustration video + +Below is a video montage of solved Control Suite tasks, with reward +visualisation enabled. + +[![Video montage](https://img.youtube.com/vi/rAai4QzcYbs/0.jpg)](https://www.youtube.com/watch?v=rAai4QzcYbs) + + +### Quadruped domain [April 2019] + +Roughly based on the 'ant' model introduced by [Schulman et al. 2015](https://arxiv.org/abs/1506.02438). Main modifications to the body are: + +- 4 DoFs per leg, 1 constraining tendon. +- 3 actuators per leg: 'yaw', 'lift', 'extend'. +- Filtered position actuators with timescale of 100ms. +- Sensors include an IMU, force/torque sensors, and rangefinders. + +Four tasks: + +- `walk` and `run`: self-right the body then move forward at a desired speed. +- `escape`: escape a bowl-shaped random terrain (uses rangefinders). +- `fetch`, go to a moving ball and bring it to a target. + +All behaviors in the video below were trained with [Abdolmaleki et al's +MPO](https://arxiv.org/abs/1806.06920). + +[![Video montage](https://img.youtube.com/vi/RhRLjbb7pBE/0.jpg)](https://www.youtube.com/watch?v=RhRLjbb7pBE) diff --git a/Dreamer/local_dm_control_suite/__init__.py b/Dreamer/local_dm_control_suite/__init__.py new file mode 100755 index 0000000..c4d7cb9 --- /dev/null +++ b/Dreamer/local_dm_control_suite/__init__.py @@ -0,0 +1,151 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""A collection of MuJoCo-based Reinforcement Learning environments.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import inspect +import itertools + +from dm_control.rl import control + +from local_dm_control_suite import acrobot +from local_dm_control_suite import ball_in_cup +from local_dm_control_suite import cartpole +from local_dm_control_suite import cheetah +from local_dm_control_suite import finger +from local_dm_control_suite import fish +from local_dm_control_suite import hopper +from local_dm_control_suite import humanoid +from local_dm_control_suite import humanoid_CMU +from local_dm_control_suite import lqr +from local_dm_control_suite import manipulator +from local_dm_control_suite import pendulum +from local_dm_control_suite import point_mass +from local_dm_control_suite import quadruped +from local_dm_control_suite import reacher +from local_dm_control_suite import stacker +from local_dm_control_suite import swimmer +from local_dm_control_suite import walker + +# Find all domains imported. +_DOMAINS = {name: module for name, module in locals().items() + if inspect.ismodule(module) and hasattr(module, 'SUITE')} + + +def _get_tasks(tag): + """Returns a sequence of (domain name, task name) pairs for the given tag.""" + result = [] + + for domain_name in sorted(_DOMAINS.keys()): + domain = _DOMAINS[domain_name] + + if tag is None: + tasks_in_domain = domain.SUITE + else: + tasks_in_domain = domain.SUITE.tagged(tag) + + for task_name in tasks_in_domain.keys(): + result.append((domain_name, task_name)) + + return tuple(result) + + +def _get_tasks_by_domain(tasks): + """Returns a dict mapping from task name to a tuple of domain names.""" + result = collections.defaultdict(list) + + for domain_name, task_name in tasks: + result[domain_name].append(task_name) + + return {k: tuple(v) for k, v in result.items()} + + +# A sequence containing all (domain name, task name) pairs. +ALL_TASKS = _get_tasks(tag=None) + +# Subsets of ALL_TASKS, generated via the tag mechanism. +BENCHMARKING = _get_tasks('benchmarking') +EASY = _get_tasks('easy') +HARD = _get_tasks('hard') +EXTRA = tuple(sorted(set(ALL_TASKS) - set(BENCHMARKING))) + +# A mapping from each domain name to a sequence of its task names. +TASKS_BY_DOMAIN = _get_tasks_by_domain(ALL_TASKS) + + +def load(domain_name, task_name, task_kwargs=None, environment_kwargs=None, + visualize_reward=False): + """Returns an environment from a domain name, task name and optional settings. + + ```python + env = suite.load('cartpole', 'balance') + ``` + + Args: + domain_name: A string containing the name of a domain. + task_name: A string containing the name of a task. + task_kwargs: Optional `dict` of keyword arguments for the task. + environment_kwargs: Optional `dict` specifying keyword arguments for the + environment. + visualize_reward: Optional `bool`. If `True`, object colours in rendered + frames are set to indicate the reward at each step. Default `False`. + + Returns: + The requested environment. + """ + return build_environment(domain_name, task_name, task_kwargs, + environment_kwargs, visualize_reward) + + +def build_environment(domain_name, task_name, task_kwargs=None, + environment_kwargs=None, visualize_reward=False): + """Returns an environment from the suite given a domain name and a task name. + + Args: + domain_name: A string containing the name of a domain. + task_name: A string containing the name of a task. + task_kwargs: Optional `dict` specifying keyword arguments for the task. + environment_kwargs: Optional `dict` specifying keyword arguments for the + environment. + visualize_reward: Optional `bool`. If `True`, object colours in rendered + frames are set to indicate the reward at each step. Default `False`. + + Raises: + ValueError: If the domain or task doesn't exist. + + Returns: + An instance of the requested environment. + """ + if domain_name not in _DOMAINS: + raise ValueError('Domain {!r} does not exist.'.format(domain_name)) + + domain = _DOMAINS[domain_name] + + if task_name not in domain.SUITE: + raise ValueError('Level {!r} does not exist in domain {!r}.'.format( + task_name, domain_name)) + + task_kwargs = task_kwargs or {} + if environment_kwargs is not None: + task_kwargs = task_kwargs.copy() + task_kwargs['environment_kwargs'] = environment_kwargs + env = domain.SUITE[task_name](**task_kwargs) + env.task.visualize_reward = visualize_reward + return env diff --git a/Dreamer/local_dm_control_suite/__pycache__/__init__.cpython-37.pyc b/Dreamer/local_dm_control_suite/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..92bd50cdd8bfcc514b7f0b72c833764168916645 GIT binary patch literal 4122 zcmdT{OLG(174DZ>viw9g#s&tiVZau{$Wy6QWx^0D0>eNCJXP2en66OmYxR}YxF7QE zZiAinW=v(5Z1M+8cJqI->TOo}3$x01Zp*fcfeOnRsdVml-}k(a?v9VwHGKX(yX)mM zn)V;+9DgbRf5b2Om#%4^=4hVo8G#-ej-eAaflbF$*aEg3OW{gTaVnUv1h!+Vw(3;T z+CeQGaYoddYETbHol%8rz+=vs!Xv=r&bY#L;0b3!;Zfj8XHwxY;3;QH;c?(I&KZR# zfE!Lj;Yr|WXIkMY;2CE|;WNNzowEuzfM=aqg{OhfIp-9f0iJW_6g~?)@60Pa3w+)= zukbnG3(f_F=YTId7lG%!^KUfq$)R?1>8-Y+9qP_y?}B&njpi`#6Ymn*1;M)_ys_VIvrZfYqLcb@#NsY{lKm;Z7k|FV6XCI^!jHPK?1)fADSIrqjQnVeiD=iC zaYTbj>%XM@rd~|)%_I)8RJeZFk7bIXn&oOD?!719p>GB|v}qIz{C*bjG^TE?FCs4svJkl5kLjTyPZIP-b~5hCtS{)WsuaA_ zQ+HI8ly|lV@Rc8i3fixDfWZ!|Uwuqa@`-!+B{x8PE!SRL`HOL&9~h~2r02$4U0SJ; z8=iJ(NGmsT{h!8;1>NXHW$n91>-U>xQT3yw4;2+PKM7+zR20=7Pk5S2%o?pP3M=BF zDC~0bM@?SFT|a%b+>1l8yem8&rOT<$msh|5Ub-B_9Uiz|=ngdCCYhg#Wfx-iQ`c?v z_kOD1_WVxTiNpS#FX;6ah^^VWTmU#>&{m@Z2D|8NX$Si2=c$(JN5)&2;fnU+Ay@^Y z^V)%#BM5BQWGgpcng>=iM(vT6Yj5=f>-DAFqCMErIH=_MQ3b-NbRi&J+gW+0?KGZg zshz{HAsl1>>!%{kWRx(@60wtss3XXHm%KRSe#A%~H(AP)Z3UaG&wZIN7&A+If^GS` zBEs}mYqeP|>;(2IyrSl=eEX#RXmzcyw#8l|=fIF-W7QTWZ4UE_yrPn3eORMin3yPR zDUvKmi_!94x70(~aVF{hglxGSC`M>Ntx*~(3BxT#-Q5zYtJeP({iFe+8J5w|Yeo&f zF=IlXHRMfc^#{oro4!z(6rGYFRKhk^`zk|6mWxd>U#@iSX$ z>xpN4%tl%~v~Bwprzfy)k9pf5)o#!w@Z(pp^cWUoDqo$1fYURE(wY0AqbUCR-96&r zgGVYpl>CmfWBEqpz>Ej}zHp_$OTFQTZpLx2(PEFfY-3$!V&f)@H(w&uQ&x^ML@dNI ziLfK2@CN61B?O4LCk5${+|iG~FHeOMZ+QpfDWhZs9f*F5t&|qeHXiUG5gSV90~0DW zw+_WfilbQX4Vx6PfLwHDGxGz_9THWua_Sq!s8U}kyrMC*Y9P>J>Rlnc>rNJ#+6x%G z$~4+cO_yJxcj6dTbWT!ficSvK*sjXiRQHf;D2E*CKj`Ex$TY|wdF7>v`7beVU|z=x zM4cbtO8q;&b^wR+%+0++?O(zx5%Z*76$o|>JHMn7mFlXgm1n^+WEN~wj($*O3OhtLkE1&EB7y~l(o)Z z7vzCDm__mN$@|U6Vpjz0J#!RN2Ct&o)~ftH+^JcWSJ4#pQyx%c$1>!ps)Guv6Zes> zhzohE+;Ey3zr=t#Momd7bqf;)@s5|v-(e!dPt^q4bWNWyYDQhR^*U8K>N^Fh)-}!5 z)dwZ{yr{ZJ@EzAhp@jbssQiTMN-6}TNrcKExlCjpq!>XamnlMP;*6hGxh)w;3L(Yp z`|DpxiC0pfk(9#a2od_TD@Tcp5g8{kL1dB$WoUVZ2o-E{nh5?BYw|3SSt92^3cIXI zXdJK50Mw=F#fcj&;Ai_Pt|0Y(HI5)sehH zN0|uvPUZgd^`~uTQf+hZem^K&RmfDr8xp*#TNrJv-7fzi+@WVDk3lSBOvkqrK8j4$ bqmd7P%^9oKsEyXmQeAUq&D8YzIZgi``cZe} literal 0 HcmV?d00001 diff --git a/Dreamer/local_dm_control_suite/__pycache__/acrobot.cpython-37.pyc b/Dreamer/local_dm_control_suite/__pycache__/acrobot.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9743eb29f19aa2f71ec5f1242cab8dae3830e63b GIT binary patch literal 4656 zcmcInOK%*<5uVq+mk&`CCCYZ(NfbkiNJ}|(f<#dq>p=wyWZEz(z?d9{!|7gfR`XbQ z&ywO|b&>#|>~5$80Rm(e(^K0$)7@3|)mPOImX_KE zuD@;^^S@j&jK9&J>T)1_j$8d54L4kao7{@b*m5nCWE-;W+FEuXJFcVUT2ymu7_Ue5 zxZyT*Un6S9Ew`oRX4H;5Zb!>4$X&Oq<#x0bFT2ZH?m%8~SF~J#m9Av&-j( zyUv&R@@vDrBG%6~_{zB{)_9HA1~y;imtLE%O!w+LBWwJ!9=Y~AgRk=|uMK`hG=`h6 zYDai zRP?G8C%)wBEqN%QwX4g9@DR6JMUxq4CO6N`%oth&3mdm4+h2=pEEC0imW@Y|VCB4F za>%kH!QzyQh<)?LQ>HTP^^o}q$GbcXGL{aQuawACf70;_{rfNCsAtP27IWKD_}tSI zxb^$HyW4xae==kjL%R9L-L0cE7F)-H`$@Kyh5ptfd=U$|6{Ue6c|7(?3wUZAW@4*a zV|1FYABwD)+QWvtVjDL|PYaD<$z`;8_dq1UQS8g-*m*#vpsR+P@T$%7O9(KBnOU!l z?~RvsW^;22?7p#HI#X-(gV1w6RCb0!~D=BxaF;75KE z2$uQkdH-?G%Ij+6OC|EU#75INZ)agFyeN#rtXIz)UmcyQFi`o*K&G(=>v@Hp3k4bv zis@t>+sr%L$#E#tBo;~5tI0L=Nd%&iuZj79_Z&z#RC!}m&g{@u=4O{m^hHCRz$D{Q z4}X*nYo}{=&8F#?@;dtdFUI=tqOl6ou+nDzy`CirrdlbjUau}MQ^z_r#hG&h}c>~WnZ}JwN4c-PiI(qMuhoy^|3gdC)Q=GGtFgs#C=OO*`BQ_8|qEIL{ zNTm+D3Qi@R1SU2>7d;8b8oOkHsWAk4hu9Uj4za6&wP|Q%{-QGWQ7Xd;kwwIAzUU#o zK|~14dD2s$>o?Y|JJpSQ-Nk&vD*gKD2vsvQfNg`f&{bMjvmlka06@&h!vHGwkn zL;BCC`5~H~DSt%r1=KXFB_3n2(qgsJq8gm(K!BOK=O+Hl-LpBD;i7BCOEa^F&RJ~= zK8C~W$(>3oqFo-UO!ElQf<{CZj#!o!A_81w{k)!OA-6GxNSP_*wiM{ndofB?ZY86< z9)$qeVeTZUj1gm`sJn!Zye##KIWFqmnvMLP6sLeQtgg9gPA*l-&)%46Pv9hSoq?0` z&D1!v&Y}C6eQpdat*bftcu`k{EQb7pyb`2^3}liKMk(@3M{4&&%5$<(F|pa@X^|9s5x^ z((pW<2A(%xUjCFea+{htezZhnt7+i3v^o(jDAsB^_!ZO^plfzfR-#K0+Wv)amcBCv zsG_*@y5%}r2IstPf_WOeshOw6+nRYgyo+b^JES=T+a zC$jNKTYr|iO4IjuW?6tRUEPWTX$rr3MuxOGHC|amdkO-2W11ev11LO6z;1pN zPPB7bn3RZybN9=%w%>2J8LsAqlW-cszMcmn)4Vmyn_%RY=LHc; zB0zKwLP~hw;8q%Bt-3i`ExpjM<_d_u=%ObDGPzt)g+w%V24@fLjzKMoNPB7yQ5OyC zlnW27dq&pCngoWk7Ev0Y-~bA>$)9&oAnI_2|1&{Zhhlr7Vqtz(jZ2-CgdZF+F$xuG z&k`ZKN9j1?9iZ>{$;LE$%UWH+*-Etc%FR-YtR#&LnU)a%z z6r(eSVKq^;cFoDv$}AV&oG}=oewLg1_yhQsh0~@k`-cuJQacBOO;JQmE%_0*5= zkk@ql>bjMJ7>QYDbx-;{9KU0Vjz=06BzJ<>3p1F@X4^zaU#`rs@YW0#gkv&;h6OD} zQB6cX#G&xbJIBGYI0ujUOvP!6inUl4tf)#>4nc)sqN5ji87G%<021y4$TUAJ_6QI8 zvqGNhIZu+>z3sV5FY#)Xq}M8C=EFUc;*Q)Wx2U;8&BxRby-5le`585O-EBzuQb~q^ zh@y%RY5gkLbcUu=#WUQBMkqrsce~wY138*TdhhEss3aJSsZPf{WK2mxCds7o*J$$g z{KP5=Yx&Z=qc5Z45V!H~54W;G)97>a>cUs!ER2+F(U?Y0O(!kQHST5a(e~r}&z|mi zdr!XD_MSfZ;>n)d+4Bz%1*Z(J^4jjRCwtq)QPRe7Lm%xV9dmR|k~{R8R&gQXX6%zU o?-do$XXH$E3yp&s5I?I~Jaz2h)Mz^;0yt-v9{>OV literal 0 HcmV?d00001 diff --git a/Dreamer/local_dm_control_suite/__pycache__/ball_in_cup.cpython-37.pyc b/Dreamer/local_dm_control_suite/__pycache__/ball_in_cup.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1459884cccbbef96084b8ce1d6c8e4233e47a552 GIT binary patch literal 3244 zcmZuz&2J<}74OfPo)3R}yU zzFs?KTXTl-Kl*bu955f_k^cf=Mqn_LS*e*>fn^eJgSP`)^A30?a5V3xZs6j3Ev;qs zpsr>0)XN${L-StR%vwQ9^9}Irpso34I+x7{^Wa;-0&540eEwqT(g>E>9Gib-1S@>` zVwEkh#Y>AXFqgS~n=P^BSLV4H-1*dCD{S?Z!B%~^Z`Q>6v-On;FzxZ%S6~uyvoWC}dDJEE z30PTV9jk1rB;#S4WQi&luZ~DEsrax{FYC__PGu6ya-lDZECi%s<$&r$4O)FCN4$v| zY%W{d`EeqOJma}4m%^=QPkWy|+YNg=Pqz1Z+s`|$Sb>^YCf#LzqZ=N*jKn~e_2G0o zhfKY3keH_#ZXBuj;HO|^8^o~NX4CXc$DB9C+xQ=o@M;k7ALEfe2sZ)~iEc47u$jed zB%Q+?=Hl(L8bGUyJ5U=xm>$=EnWzIlVl1I-h*H1LBg7|{e!mbpAZK9(l$2_*i6v-| zPpQUeUw#N?VjRMohuEGrA0X)O8E58HYQvBlsWmYt*17?c-0{Y&ACGygus69u8$c20 zt3vY%;BnP0YZX(PC#5;2TusxCQ@S}KnUxNUR8%%3!Z$n_7P54RcI?uTJnf6`!%p!7 zBG+kAp*~N`gsKRu#VIx=Ioq(-%r$eoHifKvTkRSQ0)m2DgYg)Q0vs3SrEzX*2&_Z< z(76Q+Li&)TE^`2rt!k=HpiJy_;|R0PDahUNhgaZy;3xeXL=x#I`3xvO^WXOi;T_Jj{^R!bdy)Yy{hGE$Zsnm>8 z;@e^PVicv*FZD2FMI46JA>PNDGy7_{QXc70|0NjhM&ys>)wJwZ{!kFpkhIKf{(Ncl zbr`?$W^;(ukxR8N5%)nI{_(_JOdEH5-&KM9=OZ=avC3Ukv14)y4ceRnhu*Re!pt-4 zWOrhT=ae;QAIO+Xmns%9|MH z)8eV-v;f zzUF*?TGHj)gv+`Ik`^D7FUrgup27mnvjer01iuow0=>R$z00E5uTnWa4BKB#g zSyj^dC{IwAG99+!hxk-FFDS!XQD1QpqHb|jIyvfl>B`ZN3+IDY6XDF>BVkxo&ypqB(x+5wb&f zhwlIHbj#P|=Pg?0T@bn+T%yOpbJnFXLDke=v^8&mrz?X!e)p;XM7dw=OD>LKXaTHz2b}lr}b)NhRHl z7O3aXZ2Wt&ze;4!Y~we+cXR$XbgvIj0Z)%UJWa--jY}&3=6L=3fW90?LULblv`K~z zE+{Ecy{>)rrg2T$uu|WL>hu~iUcGHmrGRAp06OC5M1DbpdX^y1icKPqK+3tP_hXK3 zFpU5T0~qsbQqq-$lKvAs8V9N@?Pag7gROH= z)r$rL&U(0Z$kN^We5bcvU2$yO0`%=dm&no~4Sk{+$gk6(^d3~D`61nNf)C8RnNHT9QBFcz3($wwOb)8wFmuDEfg0K^n*TT*PW&r^Qylf}%Mii4H|- zXNGpI%9q-`_0oUf9Q%KI;K z{qw^kE>a{ zPZ|@y@0)QWS?V{RTioYM=N4ZQ{$cCP?YDT7w|INx_S<}!ubf+={lq%6`^$Xw+OHM9 z#&4Zl{Z-!Bw>s;ydyhjUSd@fHv0)~e4%HjQ;*{yJU^`i=WtOo0Jl3N74ZOEHcCi{B zs4U5~2;#{!lNyhF9v{Uj&Qgq5rZP@-FwE0Pmrv@G{4k3$ylX_I#a=|VE-x2@M`&t* zPFQ^#7&+YTyL_Fy+&j1Xo~ZE}ub)#qb>84UB53d>-o)LfsBvE^qwY5`ca^W1xh>H; zZ1Y=}zqo^#Zx`Nc1R4I-v8?wGzS(4K+XSRoC?)Y=}EVI(A_i7_bK!N`~(JRB*-Z&tHxQL zaCRUr12dKw8;Mj%e89pasSLBDP{!dw0{pDhoEEZS+7$s zn$Vt#c7>Bpi+ah9qIGShq7hV2Zw1h*+~5>QaU4qCaSBg~WGG3)|6xf8xb^L?w#L~+ zY#j+6rg{s&ZaqO96Ct;f46X!x5>&MrDALER11M>CdQ$j75F-;out{n3(OLeA-L~uY zY?+j&d-*=H=^ZY*Q`EM>Pw zKPkdrqbb8Vch)iS#>K zCczm@$0sU|)Zj`7yB$Z?V6teH$`?>@ct*|^jcWc)uh;SNAzJCRGg~uWUo-G-*U9GG zxoAPQCa?B@q& z-z@chVA4;10fSs=m`cQm6wYi7(2t4GIH?e&s^x0Q!)r?piS7FJ=&)v5J@g~I2DgbI?XAh+!(9f z3jiL4Ne-NGE*X?rA9VZcSLzm&4^B%u_m8xq zkt1jJfm!Y5j0M~@YXSGC_)%_<*c?C6I5A$h1o9cG{mlMLxkc7E38~YY{qX6ZAT)HM z2-!iFB_d2W*(lTO&!^o^U6M=}9=#}or*Nng{MzYu>|?0RMiY6ZzA|( z%=ikeLa=GC+a0^tS*A@P#pV$NMKcI+vd9w}w}aqo>_pX@MiB5U3IchHobjhJ!g5qp?RRvBF}{RqO57z5lH@3n~osd`pEjb^R)+r@jpONiKp=dh3IsDT6h%N zHP^_iZ<`O2Sm`L6OdksTHo31`vWxNeUA5l~#_?#Jpy_H|N!zacoH_%<6&kd_5UpBp z!7AEusgO+(e~AH!mWM}gY`MXmH@3QyqHMJVPFaWMICkpHQP3Y-r!MX_+`&8U-nLF_ z^V;0ofXK~jKd|QR1`xuG`U8s-O10VZh3qZ-#!;)n9;Yar{SExzUlYEeB=tItVr5(5 z^h?LMlEXF10WWIiJX`pnb09|tArkiF>9g(Mzj_TyzIgfk#Va%#?{2?-y7zMX)jrC? z>=QqN;i6v3NYSXWTr4dD=r?hCPC0}|3~ypi?2FM9ok=X`j@`6<^C#&|XH9;F8`Z=x z$Dsl>A&GC2(jh6JqIq@BD$fneR}Z=}k(9imX8s>BYFKjmm-PM5srvgYeX+eL>T}^zbgq3))Xi|YeI9Gihbz&A_64*pv{(Iwo4?NWNV`5jTd^>*(K5QCiRS!$u6(4 z*G2nVGwpA%H$j2P-eR`sv)SA1@-wCV_C1ANVOO6i?5c0|E^fqJF|NG*ww}5Trp8qrTAR zL7X|ALF{E{*+Nq#8uSt`LCx}#IO9o(scmbyPiZLkAG@4U)$}v& z(^!+cao0Z~@i~EfC>|Ab(Nn{H2Y0%FB2z}nQ#Dh2T1Ufi)ZyksKO68kwOuSHm<-QyFd?QReH*{F!6fAMl zL6G?yUcvQN|1e+a`kCCw0X2@K;ahY>I;*1b6_okho*#Srk;@-}Y)_{iTxM|#j@-aY zru5WId#Ze;9O;=3aaKoAt>^lYG1B^Wpd4wL(KAQ-$mnQHA1Q%4(*9EWN(J|f;pLKh z#Hxv4eB_31?D=-)rjJ@5gE!UmF=%iG{k)R)U7q@R1z!yElh?B#@|`ejU;>eG)rpedQ#@DE0*`Ibek031FG`IT2 z$_5c7zJV;x(+q1*QhXgRsW3vuoVuV^RYT=(qwQ;OPXqV#S8&fb#XWO)bC!FOF}9R$ z`@rP^B!fikH?Vf6sqBdrgtj`mzFERfC&=#W|H z(A6^*sB%h&tl|5!Oqz(utOc?k2id-zJx*+4fmcT=#c{I|PDtKhVN^7vk(#}*yMxS_O2rF%lDzMVh&KQN2 zHizH&-|}c5?$k~=oF4ZJZk3GsKZ=qywabFx(o+3qo72dM4#_hQj5y1)YiOHzc=EJ# z3vN~6?=$5XK5VQWsUwAHk2JnIg0IT7V{iBS4_L8Jk{;W}%kdigF{+zJQrQwJ7e1Il5^MP(+8{V|UFwE}+u>dnR;R>lCyRl^X zuDzGAz)xGlIqD>(X<;rW_%yuG+IXDskYUK-#-!lypby9mjYEqjIA0tM&G+p0?DyLS zy#oUnG2l(E_mi|;_x6L3c_DyHZuSzu!eMnE;RU{^|_yAhmJs|&3TF1 z>cC;FfN>3?q)u2w!jRkm-x!~xW@|I_33}v}PQoKM%j?^Ny-Azf@B1;!b)duCqDgKT z=G82b>oluvtKfECpQ4<$5%s)IUI9eH!tJ`?B8VFOL$+(n9Eo|Z*FLJd>6nQq6zk$ z#yCY>F;VeTAfE8aNa+Ez_CVG;ly_ah#`^H>StAE26Z@3-i3GU?@EHco=Rz_dktRtb z%MIpcZf<;zIgkk>^~25?zUK2ZAKD=o{V}nJ_@rpV3&lxWr$(G2UB^ISot99Mq?5n< zQAv1nmT<%|U>LU#e8d4kc32)jXrWNSt)|9rVV-A0B_GD=W+j(6|?(|%C+7E-whSf?S-|+_MERM?WVZ&T|GHhD879-f%#|qvf zeizMC2Jk9F};FBPshB%kukFY*8_d07Q)fB%#$`q9R-|!Xm9|&`-2`XrEky zM!co8)M@hp)1kv`EPknU0Pk4kS*=|WbvW5)Fo>+i>M$f#)?jmZ*4R8-z_b3PVkqqf zzRr>eV}?JzJ;l4bJBa3XZG4F$P7#)bzE!JUx9x5EfOe4LW5FD_VKDTW?E?<_-1fpG zMTnY+85x2^Kcm>c6Lbe~nPjnGm)y*DZ5gU!xl`ROG&HrYJo&%=tgPHb+b{p7|A+No z{>M$JuU^R$dg{fUUPfgE2;e9q$ftu4JVS8(RO=Zd1mVvC9vC@s-Tebtn)?&-g(nYU z$Q1?H(7D|L>h6jI+xBflyEi}@T>_jV>b_y$ws-fEB;18C?+4z#9bn=PANadAr7r{v z$bgmXhyuo-#!{t=#ju>Po+1H4F{#jqK6!6K5?La$GGeJNC-H6B4~Wr^gA_VcZZlb| z+^%FVmMcwmEEaj$ZunZ|_{l=C@A7XS_&f`cH<_$LUq`S&q#I5%z!%>iYjKhK!IDLA zMIYQwqa;DRAmi|I87Trc><4}6y{?y1 z`CZ6g>O&UE_jv#uf`zUf`;YD2*(I)%5N;(fuOJtm$w$@+Bx34hJ$H~H4<}9^9@^Ic z?c70_H6`URvy>IsYMQxK#AtcVDdi$J(?K8pkg@}0H%Lnyr)fx+ayqd90_XRC47&67CNSV+N^+Uh!LOuO{kRtC`sPwG%(^#Q8l!lZ($@3D46WPrdZ*O^8Dq=jWGt>YAQK-l&mm5=Q7Fp7|5J$#9A>C1!~R4bkcx-dwEbAmx4 z1?+yY@^|-sJ$!bP-^Ax`TN3BLUqJVNyeUCK`TDc&PyX5dd%i!ORHPAYF6O4ltirDz z@ExoqQ+<^9W$}z}{fN5$1Vyg(gWMX#0ikogNzI>8A!L1d>JOZP*MKN@s9$8l{tPv! z&iAQdD(XMP>p$R5sY$6-A?j;D(O0!K^)i0LOQqPqxcVstAmOLI1WG4-GDebhqza4z zO#1g{8rd@m%5~$2?<<-)x=G78XHjTXD zB_Y`oc)2uhJm3JuKD&#QwnP99h@0g{u4Bs?Tp)6$iam8bG(k9(tOB&v9BTyFkex-ea*Fld3TT{hdAxanpBDF2y^@Ui&W>%1Y?*_35Ir17H7$b1ufx8Un z%@+^caNs*G()nUsZlystN&d?`pXvnLuwu7#K-gs2R$2vhZt1`>%7ZT3%|SGsx_%oISt_kvCU z&kV~cY%Me-;{z{CcncPR*o64&b3FLAtkb^IY6wd)F&BA#denc%m@))gE)O&9ZmL_& zIsSka`YsiJO2tDe9#IieL4KB#1>znRf&z|OK3{lc&kw`WGZPV|{Sx?{tlm#>r^KMD zX@KK~ws3B4rD|2r3!8}H3pDE;u`$ICodLz?Sduu*bt;I@d>2JtpZ-C`NlNqiX+w*L z@*kSjm;aZ=En0?5MZPpMYmfzD%E=~3fj2_RI*AmFDKM6_>$Nm^(u$G;Kb3iy^UmGP zk8gkW$&Ryg|I^RBtvBR<)RJA2@peR04kZdd5`8@bl@!1f}~+J5FoKJ?eJT3(OpwymB8 zc_V6AJII=OD{5K00d_lTTf3Qc@@~|%b_?uY)C0R6?LfIBx>x;cC)(v5-hJytdt&$M zF7I6vvBQ1tk6pgQ@4O|KB-;PA!~1;qt;2UkeRl8CjqX9q9={7McSUPLg;uJ9+r8W7Q=UNQD;C*FFsXew$Os5`Mi`GngX7lrz$NAu)A}a7MhJ0MWUhQ z2%2j7Vpb+4SoNeVv@A0)z0+6;>`cl$w=BoLm#_%;=cH!Rd1JLgRAfoRqBS-{{u_z;l;(H{c;sa#4gjR%b)>lY~>1xoPS&7c5KjR1a#V{@L`6N)u&z zV_D`5pkfuM)oJT%4#^sv$+Skrc`C~y7lj`9au?cUA3OTuwu7C4+b7D@=j)X{Ji#r- zvWGKVINA^lWfkhSNs|O*hsbY1TLR_gLF^tuQTL$|PDDVfl#|Hil)Io6k9%-$U+zKo z^2z!(p-S^b79)Sd*QuU{G3P1%i?eVnVqlk0;kcBRP8*p3I4F~Zz)JYw8+W%p!E3IZ z8F*y|yFYL^ec)KUKiS56TFP{ZBFI8D7lI=l(GTk_hS9Drr7D3tr&|#xw**D zZZr035ohb3I%B*{7?U9^wY7;=1v<-O`xD#;DH;%u+Lr%ulM6lgs2@VL?WQlCF$7j{ zRe-MIKDf;1HC~5z?MvW%R0kLXxewEpA3qL1KLN6Q9_ljG61)+{$l(G-zQHiDYJ%ZN zNjwm4t=>YlBIqAm(=)2}wELIppU24=y`Z+oUxDXlwLj4d7xd)J>Q@0O=^A?*v*wCE zbiTN+Tby2!L+8)!3x7qf+SfpaS1*3=a8Tl`!8-7ESXcM-&dRx@S9ewrz#y7iCN6(@ z4B^MRUFleh4F|R)62^MdFKx&YaWW0XJXH|chVPIM@BRUmWyaB(dTu@<1gkg z-4F&WZZMW)5Eg-Ix7_*>Ond@Gp~4(@kF-gj?7LmkrU9vu&j2U zbW(uCfsI>A2P|!D8Ok5P)<-7TG?)O?T_&l11A(@clvyThN>!%yJIRF<{18%>b%{KH zsm4pn`5TFfhJ1Aks|)AZ(xr``Jxn%R=RAf<3R9dz19~4oT;AIN_uj}G)KEutC>FI4 z49q!z483-=t3flDWK6YJ2*?X*VS49rwh%0aWLY)TRXWMbltUE9Y+xF?%mk1#5jG@2=z8(;h#h}6 z8XX#coRzU2&|7%kN2ZDQtq%7`SglGhv9*+_|D>=6{}3XGKPouCSCgAeSg7>CCTOhB*(dO#v^ zCB`xN5GvEW{lt*?RbV=|4MUk#iFp5;zeXgwv-;54*_yS`X{KZY`z*e@>~Qp}$1k6q zu#>0HN9@_t=TA@MZ*Z^HNj#YdjxiXNfBf?4$*B4ca3O)(+-xJi@oJll*$-y2L$R$WfAi!cT!7hRTIp&l@4msu2w;-1lHhW%RPy79UPxlNdI#7aO z4?Bads;;W8$N%{L|F3#ycDAA6_lHy4;!`hZ+Mnp9_*p1i!;^ef*EFFyn$U$2>XG3X zI+ab7O~>T2g|g*XT&{!_r-JrsSdD5w3 zIW9ND`Dnpe;BqBA;VfdjR(LX6a+Xjx>pUaooKs@nIV~2PGvb7ERxCQ_#7XB_e_{W* z1I>9}EQx0xY0i27`TZBfsRP}AMpQ(lXNuF}%p?7w?$}SXoE7J|<-!v!&x+@`<;C|j z@w_xz^1K zd1@lNjwe|_qO^To=m)yewv3*EZ5!#z9bb*)AhA7LjfSCbkJkZW*lN?aqgeQ%{jK+J z+KIwe`!-gMz9Q%Ua^ud@?{ZBy1USJsfeCtUtMt1hpvuWYRUt0rj~ zoczxhFK@q)>dWD+_QSdo~Ek8_=&M+{Mh)B4&s^Vse*V!sr6wnu$7nG?+~AJ?zWAr zp@PVF!ypP&woq({>JsG-+qJBAdviAlx=A+QlX2vNfNoCdd=nN8E2~&%)?68E2QnT+ z{y=3X-0Qd2HtyWI>29pPzp}oua=TrT&!C$mp~)8gNniK=N1p5_S#3C;$)Z(GI3Z8a z7gmCaK8I4$LZX>1y`k52OP|vvKu9~Des7`WF#VLp6f~HE=b3_)t!hcZ+7SvqJ552~ zOLpxINaL7{MmDRka`0MuA{ZsSwYHIO^Ax6T*X3zs@(d-+bBjI)aaNYsKXf;i@4UCN z;jXVbD@;Loj`}@IA5PIqK1a3BQ*xe?QbKdO%#eDxw6tZk=ESjDdaEGLOQ`+7AkL;I z#UbKIn^sHQ{);4$7tjL~`O5O08-GuV(0bm-&`(n271Va|ls{6+Yj~2|NPNxFVYm#T zJEky%34>q>OH`0oL{-#~S4CaSAg_srXd1wBA_=fw0ph>Zu*JcizTDm81sR{g!x_ARQ^#;IKdb}!bQNj z2hiLyw=A-_edDZlkYm6*g#?Fj-;eWuALzB1(wst)R+?5&2y@cVq3+d?xvmi zgCv$o$EowXiib{;;3E8jKMIgVBj~94S_ANG#QZG}hr zHWohy#dLN|p$@kk+AML1H#>&{HVR1G0*B6OY$GlfTO^p<_5sg<+9mX-_e}zRdn<++ z>|}HN<`#Wm3NB(m*{0-mByC-Oipqy|Onw@5ckvV}Jyopq@NkEgOr8)g`4S4#OD3Tr z{*-mrS*@r$LmR53SEe^~apzLnwuf=TQc`S71w&iP-bGMj|EO2($M$vQcuN*(7!vC&e7o%wvnWd%#?(=xkWsKS7DNoHI#8oHsU3&lQ*4#)5BZxHUQ z)BFAe;byD)N%CFIjV_D0yo?pg&mei)9nX(Vb|E|p-%i+jy4d!SUWayX;uBVFf(Zz; zc0j3qpt1VuX=i$e+kRI8%_r~*!5!;sK1T=Uk^Gv!B$P8McJto5v|Q3_%}7s_i!E9Y zEwdCtavR7C+J}$0ZyvxhKqTQeZdD0FdC^At?)0L`C_w&LvIgA}VDcr{$w>B~hnMVu z-_HRkNbF#c1j47yd=}F&bK14B5{3axOh2;*v5e&BshMCmtK`o243N!t6GR%HpptX6 z4!0!11AIH?*qol0SbVtCTDz6iU6+iK>t+p?-0@LJ<(BJyXXJ(BmYVB|xa+!-B$-u* z`5@n<*MFdddBcmoLa!6kz$Nlc*}#8@LP97Q2zFh5=h zCQ&Fh@ISKelIMSy_^v$2NT<=T@mq!`{}z{QJC8BmvTqG@|K^hI!~Z}G>)Ce)qiDF> z$$NC}aHzSiJmpUYyo6A0Jdll9luyxb;u9=FpSp;}6vun{Vy1_mh~)hR|eo&#S+f1&T`WB10mGW}-a4Y3QinWAz_rqre_s)W7;#hm86&{%XL!r z?|E5M#64Gd5!T<&&bWxD5$@2@fXhx5B3#(!Job^xG&qT4wn(JP1;+J*(Lo)6-12Cr2wuNc*CP#!Ehfgxrn;gvNUSJRmdt#48%g z06vsKRFo|)gQR;T-d6fX0e+BW$993==o;1D;v zaY*t8e?G~Yx1{i;FK*y4VhT{WtC@=s(IPYBd&v~8aJV~9VI}H)xeYOm2S29WF=>oC z$U^^giAi&$V@a1nxtT5mOp106$|$3$-N$hjcCM#qik*8v3r{BGWg#3F9TNUt7<-CA zF`IMOZ?CSo-&nbM>-y@(hts?~y7UG|#5~H+Xw{51Mmk@vHlLxbuR-L#j2@CuP+q0v z8YQGVlH@_IQSt$jY<8?}CKTcDT%%R1*I6RFs4stiU;Mp* zanm=C!Cwl^8Gi|P36Q-)hFEEU#O8QOlowAz0YB-s3p7@+*}!9y#Q0;wc$&5rBmL=^^Qo(&4b4F+XFonV8qIAu_3YjOpv z&h(v3PaUi44c}mA4`49y`A#yGvf5}6z)nZG2WSj~NeGtJcHl5<#!}{{rtia+ko%XJ z$tXe+3^KDDCs~~xL*T4j$F%ZGlzfE}HZ#m3LJ6F*F^dRGoz|F{ETv8@572jtLP`gp zbX12U4HkBHr!1ufln|`&*&1WeFY(5Hou)78=k!JXpVJqNmXV%jK0mE(qNV>;wn?W@ z8H#VQ&8l)4hM#R#brNN;&AJ45nL4w1(qKvQE+)1rcWcR`g_r&{YPw0u#O-B&n0;UF zG-aoyIr$cz?e}n*C7!9%6*tb6Lmz z27@zuVJ5w;>qB zWkc4wGaArk!-|x#%&LSKo@!h9aW}zj){49xx0pTC$3FH%M@g%Gk0%#A6FwsgU)8AU z3>M&+i{|!4c?}swg1wQ#AuF5+a!e$Fu_XSm7G#adwT>i?WwVotjtujgj#J0q0LdB+ zLnx1X#`1Qk!?#17kqW|usg^BFHRjbMKsZ6Am?#HK_}@F7DBgx+P1Md#x;IwdUHu(As!zx|S_XTp lMm^8R{ww@g!-A8>kAd8*n+TWn#RlK&jMK%}7cVTn{=YusH7 z1G3RJST1>`b_wleuk2Ua6|SpzRlnA*v0U}){!Dv@qIgm)^(yTpXKwGQeWkrDo)XLVmG+9WytgV=_El$5 zltihki&b&tzIsn>A9<+ds5r(gN8eP$nmB%65yzcM|JXgfeGD~Eixa3h;mq{cP;(qL zrg#Q5&*U{vi_(_TJef4!aHC!GV(8m$&^jRNHPv+9-ibnQ7(15h4?-EEsw&)@Zsdjm z-1;Ex7;Yss$NQGyKW%e-fnE+1712eejK=QXE$x& z*>k#iD|O~4ZtT$1ic5!Z0e7^3B3AZPq3)}(($~5gjG`qQ*PM7LgUGbacsTGJGgsRU zdS<-qn0_c6&wTgo%VreA6g|@p1o{fM6Psbzw4=z0qgGPw!nUp3zSq=c6<QyRTU#63TfbK1Q)uGiU$36t4SnbIO-I;4d^&dR(--lE@5s|$*s(oJ_*QORD;m17 zbDBRexRWmRoH!fTf+|*~zzZ}(4TYk~6_jaX#|b*SzAfK_&K)uWU8VeHgM1ASFoYUw z_mq#6yLzmTV4M%tyT(WxthvfvEjIe4kv=lIT3j9}Li?pQ(tf6Wq{47|@^oQ1vJywu zb;JrBX2y2(UW;NuLc!2)YUx5TmDu&2D0T+$I3&w+eK&4a(#qA{JCWOo()q3oeG6u^ zGHYj(spKQeGE`1yHiDb33+APUq=q8U)-$iHA z*Ln{T*o)H2Ape*_icVZ2m+6hlFq_3oc!(M(6r-Wm)v9W!bEo}fw-#k48pGgNz$iZ4(xj>K%*MugX2M?*Ge zeQM6np|*p2d{La8#T`)uIZ9gv=4wK1>p~Mckj)T=DB-^(%A$h*vZ#t0{wty`X7FDX z4Mfl^&mcLQyWWgkf9Tl+GUhEe-ZgC@TzaxSv+LMcqfTUYL&=Mh!wsQDK-K~1^F{xp z8VC0rgptxmc=w^&5rxH zOy~FoDl>gY+h$IMBJ*B&@vNRB)9@-mNO#i5lAZ1ucw$My~f<@}*;-@|t! zvWlWr)g?7qDNJ~%-)6IxRxOLxsAZ*fi->dRk=(GX_lLHZw^S@kgdNM`MJihuSH4Ka z%T%19VuF_}5mzvPU4p=v35L+BM)u>D4qC>SkxLO_>;srN^MTSujw+1%wYI@BaPxi@ z=vfg}M$ek46MBB27{E^WvLTT3^dj0;IQ;E$PWWE(M5v+jA4f9Ry1h+d+pT|8*vD3pCk>d!SbO(78k7S#HbIZIjJ;W&!jIkL81cP|PL*Sb>zd9bz9>yS&*hSLi z-Dz<(7BFQGn*{ICaNtNbI0cf6>01`Slp~lr0qtv+)$x#rA$%sWaUN}5+!=OOlZD(m ztzwX|^(izcF=OlBgg^^C%TsAV$68iro}J)^{pc$WOTH#b5y4DZ(9-Fk8%GaWmZ zsRLO#vkTH|{1a~RBejLnMgEoXbn8UjUbf%1Y{)TW{kAR)&dtvflqjZJUxdS+1s(@ zIqF0UC0!)1WkG8>12;lXFb#X~B()m=0_-~&p5gin^y0*kn|K`H=0&b(4fQ0_!8L78 zO^y~WeR%w_bIGZ6)1VQN18qU}38~dRZC@E7IXMHj?&&PUosbQd;kXekIss*{+9vn}+nl36X#LMZDF6H< zLZL0qC$Y(uLAF6SWjZ;p{C}~D`Mhwt_Rx!)hj}GKCo(ceMiG4(p&Ujaik+X}E~sRd z?D8LY=0jh|zYLv1})z`=MGomq7Kl=yKPxcs`N`%I5z%ULZZy}QxTH7S7LH9a&jsq=f_z5 zXmW~96&nlT%6!jF8`jB2b&8v^bbtw`EU%Fbwy5ANoVY7X#Sasb+@>aG>p7bI=bUAaBY~5rozQ2X$#{1-j0T$6{n-dsT2$ZC}|(8b89h zqCmE`q=?do`dwX=(N^C_{d9l-OnwEQ&rCwBcA%j^ zc)mlmw14d3#`V|rjPEGc59qj{Ig^DThg-G*i;&Z$dNlNz{N#7g>u}WaE{BYaQ=>_4 zgltT>Zd!ALe1n$O-N?!_i^Dc2R{IU^ocNEbi})?5$^6*jpMfUjcS+f?cZ&tV zgW`iZInGvVlIoi$n0aZ_*B#RF5h#`{D0M1`Ejhx{`sAcllH=3aNkvO~*_q?=qYq^z zCAZmm@50nu!`SsAS)(@2HA=k2VS}enS{FCoICuT>HXm48moL42X`2u0vV?YK+wS!o zL0h^gEp1)Dw7rp?@98)X%XX*F(YR}*sX^U16W|$7jFd@aqO^J@+w#7?fTxI@Y#@il jPpfACd3ILS4IFW5Q@^rW)vF~w_^eme#ropW#b^HwE9YIf literal 0 HcmV?d00001 diff --git a/Dreamer/local_dm_control_suite/__pycache__/hopper.cpython-37.pyc b/Dreamer/local_dm_control_suite/__pycache__/hopper.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3aa62c770b477b09c422ac48a49bdacdc01bb820 GIT binary patch literal 4590 zcmcInOLG+074GM}^cF%Oz__VYf~TSk*v`v@n5O_GAP5>esjgJDT7B<~dg#aa-a7)u z!wOPf;Z>@#@j_Z=lV9UiuafeLzmQeFb9-hauv1y3r~1z6`#jI@oO^R_u3_N$c6Fb9 zd%`fjqmRksfVhrV)=a}-hHo&FS&5lizGV_^gSLHJ(++6IcQoxLuJ2;Jl2p>FU)6op zq?XqGx~6MMBc1VQG+hTh>(6Ssk<6v@{=BAVl7)29U)1z0=p}zi({8ftpWurJD~E=E zlFj+6Y~DY`7W~s}(O+XrZ26VppW!DD&axASCO^eo=5}nh!cM+2Uz+~8?~SaoQ+njR zy9PVW)?OKGjaPf;U)uh8^qgU5(Q}s1^xj90$J}kBd2Tp=FYouc@K~OPan^c6`f8fR zLbxmQWT1Eur~OXkDh%y3pbiu+#lmKi^$@W8o_z z4kwQd;uc=Hh(;L)CNmFBW%R6$1?jBe#$&DqB9mU|sX;&C-uSpU>w0RBduh&i;{EBt zeNQS#)b+p|=CU|aUf%ISDY=rZVPj7#sr4*PnzpFntfC=!$O65-sNC4z-e_wTK(t6N|&p0=K$;k zB@>rPN|N8O#4_4qc9&<-UK)xg;5#Bk(BqC@t&}VHfHF*Fy)?cyUf9ZJ<_K>8+Ir!P ztp54fcws50=Z@@=)3HW|sxbQ-Yh?Y+`r3s0oZ+n_^^M&`?S)F~YM)C_*tcU?li5UO2VPHsFc=6S3|`q50W^IRk1kH1ocV9`^{=meYE#n z#*r)*IwDU4Xg(-SR7zhd`4JU}Sj=o>`?1KglxM0~3T{2zYCnE>KWJ}0*w}7wJZidP z71P8?n!C)8<_1sTZCzPZ`{T1570M?^Mifi5!qqbV0;HTpW7xCitXVT1bKVplzW-0U z|Jnc4J(TKoazPQ(YBw!Gv7&_ptpz@Xj%HPyrv5c*$Z^COYDjP5EH&rQ9M_O|k9yBj z^FB3GmoBwoQJN!m9YM zuo}FsuK69_9NXWMaXLssLK*LAtoFQ+v6#NX#Ov@7(G5q07IZk5PBFo<2uQCV`lkTV zK|%74j2^S0v_ycvQeUm&gytVWw5tT?Nu2HTM3B~ic0b0S z_z9Y#A$cZqL9#SWK}LO723POCjgc`_4d*5T=#jVhC#cf=lYbI+Lk6p7_Z-U1UC3}q zAY>0OPoyBgmT?!Bhs6O3o_boE0Hzp3dtS+&Y!!9=9jKfGW@sqbgH%nou(E#PCVBV6 z{*hSXr#Sw<$)i{Q@rUHmBU=EfS`d(tgP>>xgxZ6I=-D9nVh|?dk!ldIJPHEQ#8zUR znh&WtiY-kM+-UFrJqSr@1~6mQ+?u0%BXmq3?IQS+pQ3pyxMsdGI;dxu^Q!JUnnr-V zsv*p(tfs@P&Kmfyeq}g@Pa!v}F=66v>FW$&kex!6-{{_fQUG_;@!;igooL>7k9k27;p^*gydeVa}*{{a8j;&IowD* zjU`8lDkqHh^TyeWPtsXyJW%GWE8fF?i972a2N0oV3cZ~SYIxo%XS5#c(q>zQitkLN zScgc{6HqkqE>=nkLPD(wMS;><^)mj{+c~}kcop7OoaMH21kj(}tGql0Ubz^8g2oUBvoR2byS=56dN>F>i zYmc1s*Vr$>TOw4WVh$Hd1GOewMY=}86#6G8C121PK!>;v@YFScyX~b54@|a zH#ll4)KwzK-a3&4y&Q&-Wr461P!*C7RL~cB7ho^Nudqw0v1QU%>8Zi^FeF>8JIa{Q z_$M^?NMTp;nc$U_a1CqTgv!>&D*OIkQ{_zp=oT%dJxF=Tn3RP(P3Gfs9I3orbt6 zs=lFouvOG1^NZSe`g4tCK*E_QPsmB3Q(4SB6pRbbZljbg(}(i~mKIKw_n(Vv*sEMN z_B_&FVS}|}n5e6-1uBM*u`DoA>T8}=vreNDLzY|}EA*YIQ*BbFsp8ezdm^1}7Mo`cLR99yc3%L%M)nFa?09bN(4Yc_)mZ(t zb5MtsoRPyCrwlf8mS}6_&}=pfOUw;Fnp(muFFRh??Osc{D}Dcn3|tcLj}Dc zJQEaOVuPB`(LlTs_#zIL#^6T*Jqd1|rHO|>YMYSOPitA{K#?c*6kiv_(qmGtHO5Lsg@O4p;KbJSr)nlvlRkAfuaUx6XTRb*c zc2n$>c6D{~5|`-cnReLiazR$G`0RdLU#Eimn-4bI;t}of z2WqJHENs-|`YI$YP#@KEg+p6ubsRJ26c>xy)v{Fjl)B_)G!E)r{1DapU)z$Xnq9LT PyZoUPGM5|lE-(EDbM>7# literal 0 HcmV?d00001 diff --git a/Dreamer/local_dm_control_suite/__pycache__/humanoid.cpython-37.pyc b/Dreamer/local_dm_control_suite/__pycache__/humanoid.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c824b91553d69f3bc9041b02da4ce147648bdeb6 GIT binary patch literal 7041 zcmcgx$!{Ci8J~^haM3QyTVjv1)L~-NXxaj4V%TvlC01(7t|iw2(RMuId!$C1g?=-X zC4%b3E>fUyu0@L+q)UMU1^Nf{UubVV&9x^1J@?Q<(BJolLyo)@S@d8S`(mL+a_6~Ef7O1uob)~rdq;!XII%}I$@y(xdXIW6%T@R{a} z#0%bRbB;|k=h=vQx6>EcF~HqqR=6S7f{OvBF+uXPzkR3@>e-d#pFl zq2(;I&|>lG=4)tqofTG<`nmq}jh^oWq06kr(0AORaY%fwtI3qJ9)(^n=CbxnTf&vMbymw%fg+6{BSZO+~-A8MZ=TrB)ckBJ_Y7>rTX}vlaTjoLLo4z(U{c za}lAVLSwOU`c)GAkt0}icoL`R{Qh_Hx!P&V=U-P3A#&xAbfblL^yt8@VX<7n_61~ky?N0E{}U6h%Cp7dtHxP!#&-gW5pZX@(g5|ttbXr9SbMKSmw53 zD{Na%6!AD}^lKYKUNv@nudWLdyCoID9cIfFlH#@1)upx7KPqAdT@w8JgG(Es&o6Cr z<^=Jj*mW)~VusJfB`<6_p3QuF$SphSxiP;q+N8Ui%y)R4u5IH;c6yLYM5zX#Xkrd9 zsjc&%wc$Hr3ukW;Gw3M{U&)xu_y9xHSbMB|uI%ZtPF6F3wLI1LjDgm*TxCy-jm^S9 z9~fUvV~!)GlNh z*ok>kOa)%ol1l9Q-1c1Gjg#q&YHEvO-mRCC(w&Xn$ZbW*R9l3;4K8B|5=cs?ndz!1 zV#}nu6l}X9416BM$&7vd_R8Ab+c)jCxe*bUe?AH_-Y&5Q8$)@mIVbkzOGg9CCKN@-r zD{}pw=TIoJ9=Y*`p$gT)CxeR<3c`B=a(b>TUh!t@tq7YDSG;V$r3Xe750t<3LqV@am zn1uj=fF%ZVVka?ne21c}5pl2mtO-uYX?71$x3dv59Bvo^`p1SGXdD^wHHfk@qR6jr za9Upyf-kuwo!ZzykV{mYS-g$C#XA5va5qE*7Z<7J8JAI|33j^LM((t8*=ckAFFq?d z;k*F$^@CIh=d<%-cPZlRv@OH|E-r@ci#`HnL%f9*>PGIs)u^`~3(gU6{lsX6-QAa* z{&>2S9ky*+%FcH3a+I9SD0#`)uSwNA8C4{OZI0Z)jdx!{Su&T(dPt7v2AFV!(n`dG zC={euf)e6nV$eBTHvSM{jG=kP7`D=Be#p%$XCJP^NiS zC|t-iwQ3lLaCcIv!hCk{?ut6mmeWabff`<{lBZIpZF3R3lz8P9k1;E=r`IyMzI^x* zl6B0LB+mm$Nv725S0Hs?lPaqpC08az(n*EA*y?!UI($^_Fu<)$;Tne}thxdcQ;U0u zj{hdm+Ga>uxf66e9_{3)n$M`p4;axwo6sxiAZCn32P()>YX{n%I&4Kka}vp72WjNa zhfH5oe*WH`vGXo7fW5s}$VLr}LE*S^QW2mR1%ub3K~$0a3NK>Luynl4`0D za@VAsen1>aITKa{*X=T%7)Wr}lcC3sL&|s8pYxA=2c8Idu*;(Xc9)flqAei1G}H?I z7St1Jf8;H5qgU$H#I$XS9JZZQY$^|W9^p0He%N!oVNc1nS=h2|(ZKTJT>?KK@E(B= z2wWj>o50w4ON?Bv46hClc{1d^nrV#xWN!<-qlc<;dLml&NR+RBrnFIKFyl$NX-FKA z{)vfrUt%WWd6AV_1;3@w6afwm@-*Z%Z1E31Xl%x|TY?LxmKa(W}m#s^SN8>$9~ z8Yk8V%42QQ7@*2|3T0zd2I|fQMEB(YJa;@)pQr`{XcVRSEl!^HK~`QoXjCc|o@=NQ z-j|q#5;3c7-nXt{Pf@-;KD29b{6_pB)tqu5xKGn(II5y;$BbP}@I;|*+ zvf)PKWQ|U}9)@1Sy1&6`Kj~(}^@xjY>0l!&iX%4~1%H}r-RgZ3UJvufj=?JdzO}p_ z!x1ACw^lFW?Ora?qV;*$ZVcz8_22u}?QWWpU9dP(G#tUP9t1tVyW2=dH12{ap}$I1 z2UGX7owo;? zXwcmUnZhRgwF;x4q=|kvQP?wW%h=N%7U6S?afu8Bo>%%}ro%jp{_ZMnIiy-ZJ4wuG zD9%;RmJ?_FLh1>3S{oMcx)Cl=USKrcw7`b$J!5IZXp|Y+n!H0;frl=GoaB6}4zSVm zu7Km_1GgU{Oj7~GZW+a*w6w1}ZKQoRd^wMYVKRU!i=SetR4lV3l+@_$;b9R`rGPBj zuL{49xl)fH^G|8w*b)8=pI_iji(k`#bS5=ZomTs&M>2YTgCo)+iCw29D7KP}4iG;{ zO5&|lMg!Csmq}j60h}1qE-S$ed==!EE_;d+HzZz?ctPT(#ETLyOS~lU(MkhyerEs5 zD7L1yM=nE}_)yqnjo=_jJs)F-wNoQ8voM#KS?ZTY5E)u1j3kpsQ>LVbJloC&*DFc2 z6?$ZZD9fUxdRs7f3ATt!HbJGN_yqx*fQ(F|b<=1?aqEcGbU3eEqo-OgjRjd;Vf(~7)H z(}w*J9W%0i-1VZiW#-euo{ku$&ntVza2&*n_$O=Kr^Y@6y9Yz1#EI2T1Ba5BthA+x zKVVOY7bD9Zq?UU{Ilssz*yK}9&MOTH17ltpl;%+<#g)zKe(mU-19i`ovrJkwJ6pN~ zPB?|rm04|2p2vx>|1|b557$K7s={WVNVEMjnIbt^pc_`ZP6vruBhlTUeMtTZ=~r<* z>;){s)iB~1y!7AxO3y#9CPfr29Bt{v?w41k1Ay10S}`d_ZpX)!v~cNbN=8@P!9}QC zL`+a?8L18CeQHSL77jUui$;nn7j*hIQgT_xvy2P=u65V%WVl>o&qK`v5o z0>1dgfmPiO9kzznq+@O`B7)z5a$=oU+??kM3@;3dvKD zBNe1uVjUoh;8i;%LPvlo{>x_0m8n!JFvZ!X_jUXvHCLeTiv3A_oARM(tNhqE=@IY&uh_1^N@ zQhL3uaQs9$}90KH!9LP(6c(Okssw%Z{c1y zaKp%B&YcHO?BytMy|8{nT&wBnjJp{}emCKc7jz<#psL8cT`%^c5cG6McwyqSx?wXx z%^IrmL3cZ9MxYAKC`?4;gS0l?nA6W@6a=zoMYti00WQ{(vM)Em$ssSFYR*XhRIUmxl7CF5pc2O zM@`pvSl|pf=EPku;Y-IR>g=ZrZJwy{9W2cmE{Jw)pwJ9)3T2bnKv!<~NS55d2Uwve z#tZFBZD1xQ)BDi>SH{5V8=Xs@HZT%vJJ&b+R?FyXNuHU1GWy0Jj4yRC&+47Ym`9vi zPlz>%8}HNw$)aYY%zwOx8-eO zfw%fG-sZORBi&ur34d;hw-hiL!-$yOxXJv)iye7#Zq^GNut4t>w7 z7OBzOubF8sgmtsjVs7H5)?VNed|EO0TPOKGt@=4#;Awj+xsUY?ZMwHOWJUdWlkc#x z%h|@rbhkK7pf*Eau3Wc8M5^}yWp80_@dFfCaVG*ci?^xfgrZcagQEs}G_l%K*=i^L ze}|=B&6bLprJS>cHev5vT#s7U1HfKgd=GOTTWlrnZYF|rKxpuqHJ?#~I8n=?L7Z%k zjfwSN&es2$w%@!L0*^_3Ixr8FPW>?353zoi1LwVD|22C`7lslZ?!@;(^r%@A5-G<# zj3c4+sTpPH2P zx&{?iruGa!#Ji|^%^s`U5d~Uq*!Fq+6z#HIEo8fzY%tmfO+t^{M*?CjJM4m>+Bh@@ z`moj_EF2hZ#0Pt~n7OR|>ivPW_b#(Qy)(#Vt@>6!cSbv>^^LRI0AB4N*Uum1`v|rU zbzOV*{-@fr`lnh_I4JhbLy8`9z4tOcal{S0)dIXuFeA3T(B|!kY)*Jhpe}GzDZ~*; z0l=#JsV?xgw&0*ss})5_tu(d_sU9i9#(2x^aB&Tt(!AU0V7k;~QIZ;AC(Um`Ld~68 zK_XgOQF9y81?V^Gal zMC4%UW!#JUS-m$x$BEW!wMtra9C8(o120YiUe_nO>NwB3u0Q-!a2ysj9Y@qKytq!q z5)~A!h#yh$0Tn-?Vhq-jB6yO1brpnM8DbivXpL|Aw~4=_i?SuUVhRV2x%bK!S_|0{ zvtE`OmZah0Ul!r$3#6Nv6Zs1pTpO-@MrG@F9b zo2T`1*~WDnvEU6!*$Dfy!1acG+iv-ho7C-9a`s+q@49}M+aQpqMAvMUm(X|H+vOpJ zCmnyEb~GH!M(zL&rR<^GsoM{M`{KD56Y!xE!sDjbf{NHkK@gAf#70En^(go8$Nph* zIC31%U4KU=DHL!ICsHBy4f|n7g+teDj`)ZRjQlk02A%!7YEgfTKcpE)q#O_i!O{67>On~f=KewMw4snf8Y%L10qX<2GSTd%huA}kXZi3)TVljC0 zHsdX~>nAm}a^M>U=W8`9tw7Z?)+6-p!f;h63v=)wP{#p0%HU%h2kU9cahg84Z%3)Q zRPLW*$e13fww1g-&T1j`dNy8@B0#!Gzo;p_<7E^~9~gV@^bJv`#E~>^8_1|b_sEyi zdmGsiGj^n(&PlyA1Yc2_9B z=O!|WguVzFk8!uTW%G^~BMtoqHSMUx1v+{{zlRP0s!f``ArrDSfMW^UO0TMwz%W!6 z#*sE6brg|uzVMdPZ^SBNs<i zf?FnV_;4sT(1xTfS#L@@CuvL4c}eFaU66DhG{H)t_g3bzTxhov(1eE?<{otgx)O~E zdDCg-C9+r~ts)#nWTFDUw3tz~Vp<{PP-#Y-Rvrq*1!v1RYdA5a!gh)B4a{(i+h(*G zPk=p{LOMf-1gPg?SL)gCP&xEQMP#$%yLVxd<0pEJ*iEM_qX^-PiBJ>BL@*vR9ix5z{t*;&1q1a+`L`h^^V#y#1L4pi1^Q=)=5 z1wcLrSTvDk(f5?IzHxhP*}1o}diUN&nv;G}#UAn?#tOI_iib3;)Xk5n6Ya|N+o=^s zBB+%WcuQO;W^OSp40GJHDyslCbkvtl#ewVl*#TI3YU~%s=2K#Dl+tO4pVM>}`Hz$O zUnWgfgZ>eXVu}SdgF>+C@F6oWq#=|6Jo9?hI13=E;*mhKc+xD?U^SgU^%D#xC_NVs zs92+doR1)A6Thb7ISQ~(d8sDHriPx1a4^F}>9y$Grh_LYBcqUPdb&DOtVpD-U6F%R zK-lWiIR{1|kqW{-LFy%bfg&xBPYnghJ)Ig?)P=83%@)7;fKiZPtFz0+iC*&XvEEHQ zKUPj;cH*b1rUfg#$;n_tNb#R?mREj!`^o(cc^>E7UwyE;Ay1&h4y~->e0ux-C&MGE zH)YL++ir8Vfw(44bL&r5H&)capNVrGVbdg6sGzJfwWyio&4kEDz9lVwr~>DYX3;o) i7lma=urrG4EhR1w^bAY4OxzR%o5fsSpD)uhfA#;3)=*dg literal 0 HcmV?d00001 diff --git a/Dreamer/local_dm_control_suite/__pycache__/lqr.cpython-37.pyc b/Dreamer/local_dm_control_suite/__pycache__/lqr.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cd2a9989fca266de07b12b1560da2be989d6aa6b GIT binary patch literal 8919 zcmdT}+ix6MTCZDoS6^(mFL5q&*^^0TCcR`kJAs{*vct@H6DI^5XD9Kps}^jz(x=)k zr!JjS)p6WYmSAJFPk}{3LLemU$9X}#AhG`d&meeMm4pQ1DG!JTB#`!r-*-;+#des@ z0xxu{PM!PZJKy*FF7@r1nVN>*KP*4xKY35n{)GyopM}h)c#<2srg5#Kah)519vU4( zr@V>0*)e6_Lf-0FGG7Wxof6thLAg`LyBt(PyJO3?N>B}Jotn(sK|O4A8Zuu+ex@@c z^R-|$oa@ZVd>#2_rz!J|U_M;vEXaH*SnMoe?wR0BxZGKmb+hPuwsTI`yz_kSx!$?J zo1Hhjxx+V)w9Z?6o-Z6|owvQW4!?4wc^7zzmwF~&a~Ssy zKhH1VeTiSlw8vhDs2O4Ldz4PvKv-2)$F7kI!^N!cpc@H)3^U_1Db$R&C zcSPLv_&~Tpu*bH%$P;eraklo^1IFXf^`rJnqIyfu=iEn091K#=@xy*BQdHU8f9fZG z93em37k-pFy+PDXQL~P!ayZzDyD@T=ZXBf|4v@7Txrs-O-8c;8$Qm_bB~K)%uRaR{ zCynEP3MDTU9`!B>H`=0%;m5h@N4>2-{S*5AeRGoG8N zzGL(Z99Vb%8XWV$O9vuKSi0@8Fy>ysxR-Q=-=~c;H(_qXzJG6xB`J=yg%W2j<9;_~ zaSzRjmnQ96jj+wA%j1MF`5-EGN5Jy0Z-B$Yhm z@%BsF#Y-@^)+&p6yab6rK0i83H%?L<%%?#F-+@iW zWIV}n|Gz=)6sHruDHy&gg@(Al=XZUQtpbgUF#w$;RnRyQjtQ2B?lV6egpAr)kD%3s z9D02}f${)Ovc$tgTw)YsBXC`X_(B6fu)Svm+`R_c0zsn!(xFGZEzFR3Py@7V7eFOp z(`Avj%QmOs`YJ*GdK`HyjWHtdpsidvw3y8uq|oQ74~z@;)LBo zk5Kxtx3?P$C_%9`AY@A9Zg9F{>Zy6g+jy1McqPD1bklz$y~ng2NwQpZEX^pjbcsW- z4$2Car5Lwrc@4%%p~Z3|>MOXbPmxW6c3%0;_Fm$5lYFix;&4ix z_AXgC^s5$+gpflNxPAzGhaiI+MXtonB3C9z;TC|R1fZy7l|z%4dF2S^GFmKviJigh zd|~d}Xe-Iy)vQdtk91wjZ1k_C<;*;+WIC^(hh4=R=d~l_xlVJ)_L-vHknP5?_6opf z);&KBKCnupao1iAJYuR%7-2m>k_H!X0(cZxJ2A1S%JKOPc9)N(m*hd2NCij3s_6A1 z7(foBa0}MN$W(#6Uzs!p@vbLe5)C4L6;|G8%%I=rqxzN-5ym!Pw2ixrR^EvroTPub zaCaj%hlNT_HBCvLF~lG9Z&K+wdBNq1w`-Sz`6yyym#)T6UtEATnukjONw$Q%BAe`w2g z2e}G0A_J!ZQ$oHivJrE$A1A{~`}#N7*V)(elI)&a)Hkn=uS$?DB+enpD+R$?c3!4? za#K;4^#$wYCQSOg_HgiMR8e+8`iP?;6?vt&-c~hNd-QoeH(p2GYCbc%G za?3S!CYi6wQ&Co2r9EDw^6EsNbDEqfQ@h~$SpwK5KU2&``)On~zMRctN9O)QwlKBFv&9}Kbxl61vk&L9 zhFqtyZ$J5??`fE62{3$)pMOD)>IL`cI9T5M^_#b+ozf|=R4yz;F(DUxi`-Yvh#lcj zp8(O3hcvQn$i7g<(J6oR%B}cz(jw~yik>BVN$N>CC1-c+eSX?dxRVBoC^5ew*56`(l92(T;1_f5p|&^?L=(%O~yI=r%yBI@A6BmF>OKV8EI<55f? z6yXavslzwXGlZ=@-3m5U`hG}0V;jMQ^3IB3?w0vGhWPr!fu#&(rmwJIR zF#o;=U{jdaMs8>;-O&DEN|)GSj93c>DZHf-c}dp6%ah{?hlf?Q1WjH^`mmQ>1g*p! zG;KitH2T6z(>+JVx>{%Q+Ju0)5huBY5!<;%Oa`sv3yJr(<4}0Raq_D0l6WAxUXst> z@zMuHX(hRhc0pcYUh2Cd@p22M5SLm?d`Q*MgovNTsUTNc0>1cV%6*fPU!mk1NOC&` zaR~%ki&NIAbke^?oj#&YRk<6vQ=39JRLre#r`yZRSRJ;QSf$RlDIpD&m!$57awW1$ z#q}{&*Az+RA?IWlXWS#N7lf4E@6uGqRh%Go^@n&8+LX53gprHCu?+3D1U=W(n?@75 zuBI;;Hca7~LE3LYUp8l95>xw<>}MNtrY=n~`T;t%Pw_~#>1iDuF=B)39h1W&My%B0 z7BIFX-bJ6`Cx!LT5QK+W$5<7XGKLGq@!FH-^;RhdQ_{_VDQgaewFZ(w8;(O!!=k0)I6Uq; zj^LPYYB_n46y=cw{99U#&TZ?KA#0A^1+~yG`2mvScY%9Y_>tCwMZrr4)lSJPlT&g4 zOQ2Jc^){~_AU@A)ypCI|@&=#5yT)hv9Nu-_i~SPo zewAv9U*WZI5Ku!4^*!bce| zx%3I7C7&k zGjmBxtsQa+U_8G;eG!5L*c!iQe59qd6vp&(18xBNLR!ecHHTLoz)XO*ECpd~f=LJ3 zB}*)YJxiYu+A>tT-kKqGjgK{m73~l2kj?k2Fvd3gSouj_4nOYNRNT2hB1?qst?80Vs(dC1i+7{Zxc6CF$C^ zt(Z2BavMk!cl~s)W##p598ee#8Z61{_XR>59>0ayo_LB4h+QNjR+VQrvA`6_%P9UF zee{@G_-C|#$*%JF4SjfSeEbt#$7d%-9S6I=jRMZ8(b+j%OQ=WukBlX4-&E&E{7Tg$ zS|#gC+<-e+0^eCn+F9)!alASF+L$s78mj<##Y$v-j!;2(_@2SrTUZvqfBHlktPp8Q zf=j5%O%AJy_NXMNJmCh}X{1&OcJtYhyd?e>^m0BoDhJo4-aSda8qq{;0er6|WZ`L` z9N9WeeYkjxcB z8L1*H93({~8P>WW$Em&b`O;t9`RC!mC-~x1>IE@EBOD9EB<|uRh%rtcYL*T)S*J<| zD}d$fu>(!|96J>Cm2yBI%oH!+ZYcz@z;Ubs9lp8Y33zp+Bk44TGQ5X20BVU<7})xxOaDbWzE^RzxHJ(liUt(Pp%6R;-kN^Y&6)R zCpdko8{zM)@Q74BN{HFSHYLv~A()4ViWsUQ-BW6TphSF5$?s6|LrM;iV6#e5biE)L zDGNfN5gJLWi=gbMcx0fMtS-x#U1(U1ie0g7+mcX+bFQF^xJ84n$nB7&*Bg)>i<#*b z1o4-!D0xQ78j`#=`2Z?NY!TTSe-UkqA`rg(>Tji@LemgFl^OYmdDAiOY+jg@7~>5Re^mICcOOCC>Tv6zvX>OIr2TEshV3fN>Xf!&&Qab+aD?0 v_+yg5guEWhXaM*dkYZcqs3ia7rBJ=4+os(t<2_qbseRGDsOhzDE!h7B@q@Tw literal 0 HcmV?d00001 diff --git a/Dreamer/local_dm_control_suite/__pycache__/manipulator.cpython-37.pyc b/Dreamer/local_dm_control_suite/__pycache__/manipulator.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d17653d40c686541b0d43fb36eeb64c7638ebd13 GIT binary patch literal 9098 zcmc&)-H#mERj<$P>W}I9@OV69d+l0pHgPB3_O2rlVvN6?je|Wlo(TypMdg~lw|lBx z-POKTZO?RSmJd5@gz#zsMd34=Q6wZ@c;EpE1X>~S2t4sZJ%Nyr{4WFv_?>&Jd)i~K z7akxzs$1uN*1hMPd+zU?J6C6B${K$EwDgF5_e+}gZX}!cEWP6(yU0j1iIR+O1d1* z^lHtTq${9jo3oOxf}U&6NqQ!n?=3VJByER>nv1N~Jd9_F%{Gs)x#lvS6*kWn*deya z4zne8ge`ZR=23o(t+1o)7(341U?)0-CwlWZukF6Er!`NolgxdpHBa&ryDoc^o!T?l zTl@&KnccS7=h)j%b+mp{)}Gqar|Le>z98#Py`t_ktIN8#UQzc&_9a>Ox%V}8hQ0Gt zWAE@{_w6TU^KFzg*jbdE<(2N|QSt?5uW9vnM@R35e%}}FZNDE3hM}KC!evp<5BiPg zB(b`l&H5X06b=*a1-(He5)?Tscof7z)CXM~h@hW%?P0%_pkx(Ah2F3mwIWc(R@6^K z6oRxi{FqZ^E9&)R&oWiwOI*a%RPe{XU__R-dZCv@QAin^CxY|NcNhQc{eK-jz4V+k ztMdoHPrtvslsUfWc_NCEtV&e884W|mMOLC54>zL6ihL}BILQj_px>cd^-h98W+$7& z-bUug7ZqThVVG4CUvzll$&A?=4l;AVJ6Sm|CfbU_C}|nQCHzRO3wW;KiGL4CqV4KT z-_v)D#ORvLVCFM@$4rb}D>YJc&)Bh2O7X zD&J}(_HH4y_w?)9cif#qQrvY?ih}(zFa}Q!79&; zMC(@D283Q6-MGh-q3Fl1?;T9=hMYeanp#2)E<Y0eI|vvSXWz!e|f#dz^?B$`pyt9sS6bVs-J zszGVloYQOif+4<)`mDMEUTpS!@c=T+tWE5$^_E3l@q0mEzkvd#?^t+j=!u>fsg>GK zjE^+vm`QZ1r3N#eSs>XqYuRH`cJJg}571 zml8jI;64tLO{r?Mzg%Kf<7;(G)G-h7MI_l=dl-h>UbF#rKk_$14xvE{qh8&}=El5A zf*y|(KESpF<%K~nNb1F`cz1I<4q9mA!xH^l^gv>fr$E2L*~6} zcUIT$-MQth-@JWeZT-gGx+8vuru0=xzCs-q_w75$$X8NtSK?sqxb%&V#0<{Qd)mO zOo}_in7Sc;7B9)EdQqGw`sXMiO%xX>A%=^er{p4%msl$36dOy5 zjhNE6Q1NM8Bs={7#KkYaii^s6xSy)+>(lD~Q{iJpO;PG&`!$@r{@I-TKQ$Y#j+4GqAR^a<`4Hz$8hbk41$uYzbc>jeK6+=oA-WNup*x6TITzrK*gYKw{WAa@ zaov67heIAWaOm279EL2jn4kEW^$;C0qd&+BkK)8{J;+S-$xQUg^!2(4;KjpsR*{2y z)HBXXDua#(t;PICH0(2+%99o!63^u^Xw>x4O3t>%OYJ7iAc#i~nUuJOWSSXr!7DE= z_z}mkjwRFLOz}1J5bqC^=}_)FjdBw@@3?M>S(KKc3>GcD+RcqMwb2 z)*|P&0V2^-J<*>S@o_*+GX((M#j(x|08SnAw!4LHk&bI-b&R$FVVbESnt6?b8UmrJ z#=;1t6vsb_c1=soHI0#gozc7F^-hVl&(1=>A`t*#`J{R=VMU-jV#D+Q!tXg8J}uL? zGJTX84b?_%s){NI&eYheIDwZd0mA z-Ih$n5eqb#>qKR0EiQ~dlv-36b>15&WhlqIA4h^zzOE~kIG_!75)E6Me~6Kl zCg`J?@jPVBL?h>{vB5b5+pnfxj((D~NG& z%XwM^rKeb4!UpD^3Hk!5<@$ z{|NonZq$$Kqr>9`zR(6f<$Ar8Ii8fP=K=N;7#@Zkbk*~q2_e}5S=P6mLq;n;t%#AewX-sKM=#AXEwvmgP}=$$LBgsSfSb)d=n=f?mV zCY%tnn?%x_E0?sf+9F#(KYEis(i3-)r26+Y52>gzy$=|V97?*HIKfQhE#!^;3A5sL z?FscTWt*$aL7NitP$i^xigFLGXX1Qcdt!7;%3V-7D_>4Fm!1pK%{=a&*M$6}f^BLchgd zbRPEoi3u*q2UBPBr;Md4+fALF1a^2*V$uH0E6+vlo4#MrT> zPvnESljBf&)%j7eJ?2fc(dF>6H#oW76X%J;{e}xp%u~{y3`YJVeQ-Qia73cAbpp4-*8Ik^l~Gxb2>1yzLLeq^@SJ0t7NFAmarT zC72jkItK}j9Ks`g-wux8?=0>BG3{zlQk^W^p8ng$LyOF9rhD)bEjle>4-`jHU^L#% zOcwNBu(q`Qe^R~>!WtniHMsZ}^a=I7(J85?4bKap&Yst}Af5I2B8YMG%-kAntGEV5 zGcYtcLv*S+-Z+p@^pwoR7OJvZJ|Tri5H#@i6%MmiDw)kAo_4_hwc{ah$Cks*%ed94u98qXWASz0@?bbo>;o!yG~N-mJt>! zqNGA4J1E($rp44jpvw3j0%e3t&}yclA*@w8P-~5AQC9{!DW&Bn#!e-zq;|KK6fbD> zX7-AD7mzo}0HaM6R7p}ZnDgw%x;T}Z>N~7_On#BxXjx0E=}a3zknS8>&J)}wOmqQl zb{Enbn>nTd_?C7Ljp+`xU6gHWY*w}H&y7c?Z@e_>;s3>`h((V_O_q`)%u1G)wdBaM z)-iWx5tx~y*mTECXW1bf@pCEi4-BmD&(rG8d^)p(Iqw|me}wWmlp6v8o89F-02lCT zg)Kq`^O8CYY5~;IJi@JT;3CZ*t38%3G7EfNWaf&tXDn+N^E`Np2slAbz{tnJBSgxNXpqZ%x{w}fn<%ZJ^aM)Av(kZ6Pcrj?wNrR_8evQ~ zc{9aGyQc`x;-sN4=E>2eHH0ChQ3RAoV1L!|G8Lh5eRPGw$`z;?puWcmO`$nFR~9>_7^2oTDbLrE$#wze{3wADc=e*f~xn7Gtw%iq6Wdj21T zqv?Rp3~#IcmI_K0TANXTsON8@T%xwjRxwlQbyT_t&&${tT&#+B<=$=Y8+UH5uCHYU zKTft`R21f;vlfmN8smuQ@XW@kD|luM0zu&_@oV%|M)l-SzJ6u(Iv|>)*RR}r|HgV&@!JU(Fnfq1iwCl=M0;B}9I~P>TEK!Cuqs%nG#Rhy-X|jpo znUmL|e@>x)PO1L0QcpaF@UpyCfdT&>Eo2|F{utT6#v_5KW;zHT;t%2Avblg@;t9a% zG6jT9q^McemyL!oXDl0MWW9q@3O%a8Behx3kHhOJ;~ZEr7BJqNFrf z=($ESA#suKvxg7?_AE_XHp1~LaLiaL9b0?H<6ty>TYN{Y;q^|q`pqTG>r zvf%JxkuS+SS$MQY9;XP#aF8c^P#Rqsd!;$#nFKz_FYNAPC%0>*t_Aps%sg|o^XfqN;= zuHzC#IVV2=x0Ht>os(auET?3m^>Qa6f*|7smr(@|cH^Z8$1L4bO<~XfM8lZOnT9)o z8vd#fG9B^sH#)3DdZ5olo(lTp@?63g(wD_WR)Vu2!TG$1Ga1~@9Y^sYnqY=yys}af zg&dm*etf~ZcI(dCjXc&t_l3&aoASO0iU6Tt8@58Y-WB+vV>fp;hals+$?P2TAq*n5 ztq&pv0E9MReiY%L%eF}gA{?>=@r~3xqA~lJMptfm`vNPiP~j=_Pj+AB2LY~VGF!eR zGxx8{vRZCYs2|--7IQ-U2D*dke~Xv&X*09uZy}&Q(GzvB{2hJ8kzd5zGF#3PhMFcT z2Xw~sJ}8z7-GISD(*kjGo+CwL0XCwFhwMpJAI)E-ks4ENCxUuGjp$UJusIhLsnF>B zXESqhKXibb``3uju?Zu_v9-&@nSDoJs#;YBy7NV?Otj4yE#%9fPvzAwtYc!{PRMn@ zZ3ZYrfCo?nK{0I?rj95^p&ny&;-xj^Zo1z=V63qUx$V&G{+^P5K=QgdmuZ_SF}y)D zCf(Nzm~3M-r)It1fWk~wA!yi`l7tcpNDIRI;>VQy4JDFWlh2EPL|HbIdkQTM9(en# z8y`EwL3!}fFw$?JC`oOpdexb6OsC>lvbKI&ev#rc?I8u7F_r{L#6v-$6Y*C_a8*0G z@Dw%`%WELM z!9^OOBCq##Xq8iOf&vTKtvE*poqe_{N~)!EK}E7Ik~0<)p0W&F>(Yg;dJ`@4RoyY@ Y#8!W_1MwK({&JZN{EYKbx^VKp0cbl?*8l(j literal 0 HcmV?d00001 diff --git a/Dreamer/local_dm_control_suite/__pycache__/pendulum.cpython-37.pyc b/Dreamer/local_dm_control_suite/__pycache__/pendulum.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b04cf6f64a607936a9339de37e2197f9aeee071 GIT binary patch literal 3899 zcmb7HOK;rB5$0<&PdzN#ve)t2od*z4B9DR%@_^THR<;#Gfju%-G7vBa4#n<~IOb#A z?2)a3e6oQ(Wv{tP$DHzKa^f{7=aPF)`&E;ik+hpbD6m=6-7HquSM^ot@#133!1d3y zBfha@82_Rl)m4M=74Gcc7`WjY+~ihd#+GZDB-@Z}*VeKF*>N2$*P@zR!}>zB5ZB$h zo~uWVxal^v+=yCn+ih#P3Ay8TwA_jo<0W@V%WcTZ?y{C^(TaP8cidIJ=w209POss< zF0P)LXQsO*mQHV+8SYKK#FsxB?q}lW=`Fr;W{Oo_(Ni>NWPh&q!dY|9{qig2N{(hE5 z6D7Pb9;Z@as=>pfFbmTJ^3qs_iSmY%Bv6>?V`?FujM5;5R1eZb$uxrK?E9IZ#UPDi z{bq|6@g*TM+A76cU-InJHGK9{14BRtRF@6m3GQqKgECG{4p%5+WDPBtZ%wzp7HT4s zjQLDW#*tv9@-R7I>QJya%e(gZ0LbnH#yuy&5(@8B1h9$3H%scxc2@YdlzJ~1qIfE{WRtL9s z%x@vUA*QlE7~dN2ZDpUpNq;rpJ15roc4)k}lryTG*eA}=;?{`~nkUxZtZz-Y&z|0` z+*g^#vbXrsWXzPGy=FZEPQt!4&RZ&sg%^c!sJaVz{pI0t76w_qJd|nd!E0XO;KJ_u zE~$AnmJ6^hZ*L_>p-hulB&u7JgtWX$3#($j;Jrp19AtTYTxxd6gt@yVML=FJ?OVr> zOy{%HFs@IS=WVAF-kE5q%YDV!f`FJM=TRrqiyS*Ah(f z+~L0RbLU;`Q@T2th=!LPxJYk#)i)l%k}}|?*UJO6Rci3!Kr;#oT6Kuf|Aci zBt^btM*;~Es$)6>&C>^ZdF>D(BD#*e3uTgF$elOXnma){KJJ?GCp14xw*vo_QYTJO z=zN4t)d_A^C%C-n!XajvvxZmaFi=b!fL1gd&h$}rko^>xAEq*#5^Y7AKq~jm;mI!H zBR?nO?_kKScR*-v_ZTCT!W2qldZqY?gySy_3SQ*MZF+Nx$tDr@cS*jfN^gr7P(%6r}DFrGE zlT2x)h|xi1z@XP^FIIm)zfn)mM0__eF2~;p&zdFrso9_suIFw9u*r{ z`xEXWBpcI}lIDArk_$Lca1a*R7Z@nfPEn*rcH5xw40w!Cm_`fSd}ye;Y7k1N%@fr3 z8AkuuMRnFN0~{qP{y0}Pn6IjJsX0>k!66glFhk)l(R)d>8D{Vm_NH)J0T7Tlz{0*z zZ-q$MU+#~?bv6#+F72=Ln3pB#0o%MBC`GNR;za%oC+&AzMRluBk`RR<&aGseTjOxf z0U)|@n#mq^7bvU}21PJ>rSTOWx3CG={fhR^k@yXM?%~dK1~%`Q(_0l5-&Zw5h0vh; z3B5_SQb%=$7U9G^wa$zaRHKImWUx783$o4?A5E}P@gZ_0-Oq$P(gamHYiB-#R!jledF3qJ%_Okmca~PU)ni?m&lKLLk%x=Sp;tZt+Kf} zw9a(X-2X(~2nsLtPkB=T6=XypbX~|7y(c@n+x;!?@y@IM)ALby+2R)2QMB)Gps64Q zbGmxo%woA~_R&`T0-NM7X`t>+-lu^WMN%-!FKPG#23Sx~PavYGq92OwxA>T-feMuQ3e~%$=%{z2Se#;l<6FnIf&Fk6^ zH}kSiYV`jJE9c&ts4&W8lh(9%Yq}&zV!-^W_jK!zo3Ebld3)P0w!G)tFShqkqBr}` zo|oRv?I1-5%*DYM(&v(jYTn-S4-N#UN|@!f-B;UtTSbp;qY>3TuP)2ELp${G&*3y9 qjJ)x%=n5W@_cDUQu_&rKR-<_880(k~+d(hAf@h;fUG-9Pb>%y>vHJD^ literal 0 HcmV?d00001 diff --git a/Dreamer/local_dm_control_suite/__pycache__/point_mass.cpython-37.pyc b/Dreamer/local_dm_control_suite/__pycache__/point_mass.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d4ac91f143e89a185783cdaea21f10d935f70780 GIT binary patch literal 4526 zcmcInOLH5?5#A@>1SyK5Nb8Yx%88AH0^=l=II?1kv8WU)674eSN(z_da%TW8v=4Z8 zK$G`_*n?Ecp)HFQ* zT0Lg}UDmXJ)1T>ML3oUkeus)_9gXSC2=&P57&^%&WV2%`*@A3!EG65a-Ldh#6qcfL zr>xq_VI`_|s!FbewW!{yE4d1}(P=2T7A{21PE*PCa4}lyEGfAG`9|l4lI?K0bCWG} zR`}BSt%=rIWlgsDO6%O_tLJNMiQSkOe1+M}?wV|w-FT(H)I00+ZKBQEZ?YBD{>~pY zc8jgP(%335_wT$kJ9p4>o2{W`jo15k(Q=R3ds=IKwERU9#PUJprK!V`$P4231+1dA z^lZ^POp|aZxf?`-L`XDMSa2MqK@vl54g^NJ-C^uYwCtj(6b<`{pFk@6Ni0PYLbMLO zl+#B)i6S+!F1#3P1tTs}e5g@hES&!<3x47WmR_tt3!hY(J|=|EP|_t-Qajg~KGCJt zH@XJ4VvM%GwD)bSiS9E98{<_+SWBagTfC5(s8U!Fg6QVG?24zxgD7WmRhx(+PB zW!fG!2V@bKEYkia3R|YAV7;s+xW`;IM^@U}+uPpX`@1F<@kv$x`~Buo67kJr&b(M| z%D~%vf)NoHn_=R6q01t-u!)-v1Iah%mKmI8D?N@iNkbQExw$ewq1_s&G(+4#oiz@5 z>>ov*cme%0i+ky*PiCRVMj>P?6MX~$rqQMGQhTGFnbKtX7$|>joLOUIa6iz_3~BZ4 zu{pN71~bQ6ppT8e8gF!%(i*)xH6_`NR&g%1b)=WRXg_Tk+2ZUR-5$;_%}RMgR+B-* z-7ts(*(zn_FOE*rz)!Qqu1F#mHg)p@$oEuL2z-tmixRZX>f7;gAd)!Zv25956>Z`c zeOTs~AKVuJNiWUHgJN!rw7NVFK^PG?QT+@hRk6Tq=ncK1Tl)XsWJf^o4@|b-G6aEI z?MCgZRT8(UVT~#}V6jdWnL)gRYJU6T4z=E;>K;`ysGG4~~wAfz}dD9HqX6Yvfvh@D=T9C{%fI}t31 zJ;~cyDgT;Pl}0qAWoC8^&tsXzJn3cC9w*oh5(+j}%ERt;<27lRD-$=LYYRH3+ z&_}BUeA`z$UD@u;@AA;p*Uycyc5Y6z3Hg&gdN9957NpXPeSU3|@U(pScFW3)c#xGs zY%zqJ$BBpp#e$YD-oxv<`Zup?=F$RB(XR*{qdRZkf$Fyl2d%g+*~@janoBWg7?Rv@ z-LHpUSbQnFE=zpZReFflRV@@2mB$gBKcS@w!|*kuVk-b$L7*}%b)ueE5Lo|K>mvU$ z>s7U5DH$kyRRI>utO87ySe4cAE`N(q*Qvm^4dpVU-)_yj+rd79!GQzlAO!nIQvjU= z^2kY}B*81sGVR@2EFi>AOR7+-k2P152%%%`rO`LX!0&4v$(pi_={*F@cj5qnK13oz zE&)D^b44;am<6?icCF^%*+RH_s3ga^7B?R{Th75@l7t5v&dCu+%!Q9Z_i2_?4(IuD z=#guvP}{yT(?jR;0p(yX+;BKHiSeFu5D%llX*-{#{iT|0Pr_#oW}0q5?P4Majgt2; zGUc2pH}_#Ysq&-~^AqP_eu@nTn(QVqFq>dR2(QJGG;}tfHf zwsOsZWQys#fcla<+h~zS>~uKbLLDq&Gk}#{7kOA=e-=UE%vIO*Lvmd=cL@c-U(mcL zxd&HEdckp?qhotIo3FAedQsM-LPwwe4HXfKF^;}(Ouz>5HpLtIcWx2gI5WnEwELy= z^4OT@_mD-Iab|w=(byE>TrX>E6}>Fg%aRpopI67m*qRtGf$$gBnfdi^{;Z)z@7MZu zin%@Gkv6u9|6rN)UpC|d!SB2|#_9lH8Q`mU9|L@QU@Hn4K%L~oU{tcc%#n^Kr{9HQ zILa6JAV@(o3RD7^J6-4c2{=LO?294aP~!y;ag0fv9t8vEkjoR!WA&lnGay{??AS?% zJ`t#tCYhZq+i+3@@*}9Ev?zu;IjT8vw!E){pvOHiMG-jTAq9FYPdm(Kev3yH=?R{6 z%@1=%+Abm>oRNI-f=++G_s@UO^Pf)zWvmNT5e#-)i=v8m_Wn%$oXc?VGH^4Iz~IW; z++3eLp|@#>O+#YwQ^?|dsy?7r6HGcYSt2t#Cy03W&$3rlCNY#l!;F z6XGI~>A8gk2BpN}HKPJ+AgQlk?*Cff+2$#OQBj-UhXBu<(HO92vkzZAw!3^c1FD0ULpEZ9o?T9t4K-|QprqS4-B{s z()c4Rt}v=95bx4qGK?AcHf{pFAQrzuhpdWh%>)eX!#+=`W%1Yxhurn(GDH2cavJoa zBw!bmfiEcRwJPFWyu|OQe@>KAx|h|}f0=|FfcANw@W~pP6>%>HalHg)J*O=$4b1A~ zy#1d!Aw?by`msA!Zm;4ioqSHiVnvKRewF2AWZWZwKVrJZ>GCWetx!M04f+|H{B=OPq)5$w(sumJl}So?L6Pv&+7YLug4j& zr8KkmzS`N}&hKQV$C$c3sq2GcaG6Ej)c)rVZWed4%A=e)enRF;=`1X;XZ(yx{#NIw Uu3!j86|-X3^krn{CHa=+@HZujL^ zH6BmXBLVgXl4uE05GeNyA<9V-$SFk$5=0>oKnWsO2??SkB#!b2${+ayB}#tZ_o`}o z#^YrJDD+IfdiCC`SMPoAyT7lW9vR7N_qXSZE8|=rL4Be!!SzyWTYIuH$@Nrgdu0dSIojIU-c{M9 zYhJFoyRsYQJ(YXJSY?kGuiPspD))(PmHUNWc|c57Y_Yxapx7aHid|xN-Ky*r_lP~> zUU47luk*%MA6nBY4~wV71H!(gS03>mUVU^;^Y)08NYza7pxAp$zo}RDZEAm=c!=Al zHnl%29^v-KHnl%0_Hp~`PitaIJa$VHk9nEq{+niHzj%w-FAm_{2SiyMyrosj;;=X* z9!JYTjDA8qiP2AbBh5n?{Wx0QAl`_UH%2W_pyf^C&1iXZ)bgZA&1q9V)E|F-(G_yB z=L!2{x9v7M<=b@VQ+hb+Uh%uF#lUkK?Os;~Xv&Jl^@iW*c2F<%WTO)}wZ%>~K+6o8 z((T1&x7tN5-)>%MbVQ?5_i>l0b~}OWwotXMxV}e2tKD{+=gCti-s?#oD!nCF3aaLA zv|CQl?Y8J9@ z!&1=ooSMXIoR(J$!V$W^;#RM!Tkhst*{ClB;pqBs)sR_oTP>$|BaAA`QKb{Nbe2z5 zEq=GAjpCZd_XNIv9@G+OtGdwF^c5>Gnx-&>`HH@h3XD~2U=31h#!7mi3F~T3o*SfB zflsSxkzS+m8If5<-#}Y42AO6?WRGYA)Bn_seL?o5_L9Al4RWjbL0aSn*)?+|H^{9P z*0j~)Ah)LL+O@Y|(xU#nD4;(-$VdHo>W})0Vg!AKK_TiZP~SlJzY@xMwhe^-Qdk2Bm0JiC6GVtiWiNutGzOy~XxLR)KsuOrOf|>rd^1*|3uiJj- zv5wpJ-umtb<;qK8s@CduWk37w6&wHMH-?tq=zF0>1A%p2u}rwvYEgUOh3UYRbuS1r zfhQ%fuV2EX((Tl}8t~SWcy+NSy&zb27|*9hWez7A=B4KfhgU8~s3C{-Kj%u{lc9-f zXwmawy4IC#Hwe>ifUR8#)6S*i=g!T^B0W;1A&%Gb+FmCJb67^tl~hfu&4w8oKx@qg zT?Lw{(J;>&bZA#{FHbYHTt6%klKPH|nR%Oeq8{~dn8y6HK&#!X*5Zj2VCbnZ#|X|1 zIeOS(a1|7Y@J7I^7{STmQQL-I?f72D?@B*3WbIcq`8F)=+eZ#Ablcv+>z;5s!NH*6 z9y|%SwLN*T)vdZMN3@-YB{=>fra!oT?!9F&5BI94V0;9#`TJ35W?nDqMKi09=@X`< z=k*e+*-^d+}-)ec<$ zYI#bRhw!Q?BODQ)-;kbg{GR6lF#tKyZKER}G}@liYP1_cI2J>L+Wf%lO%-?(;Wp>w z^r_<)&&@mYXU|VN=gyu#J0BJZnVxm#o|~RN8D^eaSoRxLKO6;zYdhCx*~cFa)ycv zR1k>_0n3z&ooi^?3Rbgd-x^l5j)2uL7AYn$Szk4P$!61HOilrhEfO`r1HOu zdZYZy|~e%0-H3Rt)I;YIUPCgU?R9{D(0fZ&868{n2>oZSjcVOmZ3Jwht& z@xoT{Vg)RV5Gx-e)@30$)RsspY$Q4*uL=Eay8$F(3P#5P=uVp43!b%c~^ zTuY=_<2p);HLhc%T;n<}?iUZ>I`MwZ(kk1)Q*B0|{&d9GY`@W7Y`LUj+DnaK!FGjc z(4E_|Yn}^j!t-rNTh=Jnby@IO(q*fp0Mdu9v~T+QHVTpg1MQ{(nt`&VYYiw`j#;PD z5*pT_&rGGlln>2V@`}Q7M|!O;q(3uS;RbHFBNBd2u-x-}D60->Bz{JGf# z^1W6qwCEWq7CFbMwp`zLoOLcB-+?v#6fKVAm8OpwX}!NYfxp@`uz;;zq>T z{TERzmp-61p~CHkrP>&A)X*up~Wa40)U)`0O`;U)`9Zv{E=3}4`3?v-u7{Nk9ukUKmr%&PVo^i zMy7@@>xzO$v>TFZ1oJS;T&{i&3!;ca2Y^<8Mm%VCOkF62wpXx)H<8}E6ONP~riH#v(?oncY^ z3N4!n(dciFk8Z<=4LhZgyweF(uuH9}H^H8EYpdoO5vI|9)9^m8gD9(wV0ptP_uCB! zd)OF^4t7mq-Q}>L#=viUUJAjQONqrBNCAvyz(~TD7a=q zA5x+r%s|pEKraO46OdBQ{DtYc&;*}}xn)6-2UA;^Q#UlUsHzZ;aIGlz&J2S&C3|CH z)bsxcZ}PJ!G-C%KHlYu549`i%B>*G3N&rT>>Oflsr4VoAl9XXRfL8@T$~4K8i}|JA ze<}tH)XEkdU{E8f-a#z(+M;JG?rxL4Pi`#R4I&=u`Q@p+Y~xjOfFjHRx#rjg4vX#P z-1N+xbLztEiRo}8x|_c+`~1b}n3c+^-{>q10e2CT#CSHzO8}T?39)6p8DhoZL7&0X zN@p<0`t47~C^kH3YiRAoqgz1>kYZj$kRs{be=vsA+4C0I3M08V)-yABZfL4pbh zjX`nx9kUn0$>`?zi4)W3re`mlpPrwdy$c8m(D&}Pa}#}kzG=gSc*E#R_DL8)DiO<( zr%+knCwx{!K9*8|NKF#t2FR>t2QZ(mg1}JE4Zw%jOeUY#$GeZPWXSii8A-}@yb7Oh zeLAij9Yz%enTZG(v&X@MPMw`Oc?T#MRU}Oj!o2P*#PpSywHPvC0reO+!_iLH0e6E7 z)njMuW{fEet@+ECHli@nLM*+%Er#IGs05OvdyoiVNZyMI=^g=ek5w~9(1CAad#?n!9|r)RkBLo?C@%&Lwql9@08w4@H=6HK6I&L#qRa+B(LB zIr2(5#E9-9sz=qEoOlU;joD%)WM{nLO=FTx69Y_`$|2lft3ztYUAVBZ3Y&XPFA&j~ z{RC~IEyt`Y)=Zcu4zpePGF}Ad!A7<#e;BRu{Z#x2^_xmZCSyo9!*WJ%ri8UyLm+2B4{ zjO+IpV`01+V~7-uf%7}Pni-hvi-58M2Y8x@Z%tp#v4WTBAB{VwE*ktiYJb zD!dLig~Fw+;VB=$oi`e#BgESW{yWD zqFkjxk=j|}Bfl$bc5duWc-^-!MMV5XGBF?6fI-Q0$@uRpq^4u^c~wXy4u3o0`!ONY zrpof?2^K`W-v!uUr?+Z^QZ&_OgxWsbnSkIwH-YTF7-U@;~g5*1>|v zc=W$uG=GvnCP@bJEU{!D6CP@kB@h=8=1Tg;M+UfFwdBW^pCQ$1P5beu5IBSaHLz-= z7_b5!Nrv>x211vlbgj3hNfsQ@EmFP_DyH*XCezHSp~^ zL`Mhx6zZ};)3G63&gX6WIN}_KakcH93UeH`k9Q!rBW(uml*yRzY-8YDRQ3qmpZcw(Li!5#1ZFM{K*c`vXjv4g?!j7-uiRI*pS-0VP(V7wHpg0oy zx(?fCYuF{s;{(ywVayN|YK{7$RHpxe>tjG*U-1BuT6dA)R>S&v>5m`Uf9TL5SU-W+ z?gc)ruFi;p;x@pnNiKc~wHX!$QB3~U(Z<-Ln$|C$avNk@fh$r}^E<2shD105I}^S$ zS}wx9Lvs2(^fp4vuoyvKx$~?J#)ie=Z9^E%(7?jb!GtM3hA_pj4O4w3u;C!!cfs0+ z_@=vfubVl?9)|zaNEGw4hU4Fvbj+WA{RckC|yiDAb?)EJkc# zwm>Usan#zQQYPZ!=c7-1nE=WYjJ#n&8 z4etJsp`dT#y_i)D z&=(*?P%(nEKt@gt)|@a0#!h5dh}0{@9MbNScv%b_7iuKso7nJ_QjUv?D*!YL-6cD6 zUq)s>DLxJ0KL}t8_TauMdl26B216h5`FhJsI03sUg;z&vi(j_qaZ=H8;GaEY!$Yl{ zm`b&6_)L(t?cD&bwwAFzzahK>qPqk}2GV+b(-V*Sv9A}?wLG7)F5FHu$>mLJx@@~k z?((`Sv(YV#heET_sj)rg!5RFAfIn0Iloh705YO<#jNhoYyA1@n+t7U)opAKJ+kzXz z4II@FS$3gWg{GXrzMqiDJt|--@)or`*q>F{uyjx99B?n23Xw)PEjG+04V&?6H1;~f zI;NA|m!MW|41`GzIfJ6mU&ULh_$D7xqv_+5G#!qLi2^%gp%325RV=Q<#zBlMC6JVZ z*cDQ6L>|`+$FQhVE(lGwPyX4xGDp2o0 zbO@WX1~w;7H54!E zcBldCY$Mpw1_*?btF{d!LvZl0eW9nQeZTD?tmsL=|1!9LZ@H{SlxKP19QmIwC&1ee zNG9tcmrET_`pE3yy;k9o323j7xZaX|d3cKb&@${9tcy4=v`TPP45LSr`P{_-VauQa zZzRZjpYUq#Vk?+ZD~E#jG|qhn1Xpr4B$db)(9Re~T4@8P?l`fZ;5c>BbFqF%*sYYr zzd`M-L>-Lz1;iUi2rJ6*6dM>XjUoK=52GNPDN1TXoRHGmkV6Ce{XWW;fe&P0+(cSv zkPb2&Va=)ttHR9w@|*|ti%yU&;W%tFlxPyWTG1~#cICM^bFbI%kubIe&b|g6jX`{y zHZe1mV|b7~-w_?QZLjEy|J0VNXmuCsGSyZ$v*UOgFRA^ zaY*#OK+B|;YDRyO7r1FavN}5bP2#r+FA*s!^4l9LZWt(N#1hA36g1_~gVG z7Evse?vl8{LMa2#XXQK|>mPm_$nRn4 z@bSUA3&^_%j=*=wGuW+NH9+dt`w)O>LLn3A6XvA$vN>F@Vcd(xiizyI)M^IbEG#{1 z8bXX|c=Ktv)nBGA;OODR>xswM2YXkcN@}LGsc6!G|@# zWt{)e=UYeF#T8kAA3wTI!9Wq1hkuK5>JfPvm$0w^k>3(dM|PLOQRmF`+0$p{Po14U zchWg~a=jO}M_4-U!+arW&Gw%1&8E6_302dowi-R8k6e?1q{v2S^cu<2oB<5AH9#IE z0#38X&(1hcUp)VuGk4Dkh$|0F3g{qnVy@I#0EmE-|e=Ur-b=v z)lggIFAr~nQU{dWAR=Sp53`X8i`3XUe8HU6T3wW!>T8jVM>0&ah3w$5c2h!arR1VB zrwA&c*$bp3pQZdeDmKa|Txa+_rF zGGww>pzeU_A(l%u%5Q1IH@@KTo!m>)=U50MdVd32dvN1b1YVms7^mt&;7uBj=(c{Z zF$qg<5_gpPd9OZZWf6ugR{--6}D#{3>FRm<)YS;OjqxLWxB%8l9|VQhdqk zWs4d19_p+Dzc!Jn9Bt%xgQ)l%E*lyWFv~xqu7rIn1&WyahiTU==6R1ry}u*c(cR=I zDIpnrc{|5YVV(fqDIhtCf3g{?CuXdYsOTk$3VDi9C7c1^g|EQd#xgbY+GXmih*C*R z9dhX_bjdQ2%5S1BzfJ`iX-dj|jB4uwHgf0_F3NAC2uD3P^c7;Bg&9Ee_(hA}aAsBtWThaPOkiCbVQJ{ukaz|#1Q~qzBC}T*kXmpP3E&T+7SVmp zf&_sE2F&7=2lB&0rolNvQhHz&G)P#&AQN&owUQ3j-4E4(UwqE;LKS&jgUaEGx$&i{J+>$&|7=4lvih^hNso`%x&* zdvLFO8@dYF=#ngM`Ee98k`Q-lf`x~?P0bv-`DfI^6lcjPIbCZ=pH zX209jhVI6FiMA5%`_s4qo^o7*Y#912h{9eHF5J#RQsh&`qd-Z}Qz?@k$#}4QQWWvn zslm}m)CZ3cI5aHa=-Dhrr=!t`ZDVwL!^jMxC^^tgW)gZXC^Soy2wN%3d_Di2`1&xe zDoiz0+ze15EARna6pB;B2HsD`)4DB%u@A;$>!{%KjidM-#5GiuBrq|9!t{SaTahap7Z&LUPHu#;;aC#$i(3jD$&56XBD^Tog-C$l*LrX!l5wWg znIh-9$DLzpC`{mv!yTJt^w+SG&tvTz3DYd#KIgnBH((w=Od*$Gntb)fn9(0wN1pGK z_#txsHF`hk2rPcKL;O(dlr?ke)G0vz4{*Z;=ocfksW-3{NpZ#6BIUOu%tscwBpD@1 zeUN{OBFwMQoC9tRIfsvtp*;0{|9b5k(OVV=HFNns5o>~kV zEnkwEsuY4EO>4FAqa*cWxD-3kA_d*AHhRmfL35s*{119;nu>E&JWs_bDp&+B z6(6ADF%)58-mTX?F;5|iFg17a?EJJmMja-81i-%|lCw1UA{CdYI86mJ*w0ao@G!LK zc{Zk5>@)C(&?MoOVfKiMf(dfZG}UQ5Xh$aCJDlAG~-vfwQ!xfL%aOWu-_=OAD6t|_?^FMHSd z()r57@NTeqZjp^OiV-s^W za)Yg^kq@34>?T_~GuRrh_3yl~y*uc+#n#cY&RhMv==qRU_Ko)K(c%jpgvVUCEK7nY z?Ytv*wasENILfnlD7hacgG@+t)md~B1>%b)4`ZnO9O-LL=g zyYJs_hNRr|vLQUioiCx0#<|JNi7Ab~)wN)mHQIi`e#GkqL2A%fug##zrTI3 z|D7QgF{GM*-rqRR625W5S&+&N83h|p@J7PLMx2E~?6br#UF7G(Nb-&89)r_jrN^aS z+J{2ER^ubG(?Vld0=PDcxg(y2$4MYwLi57mL4F#NL+GmDCa^2#{2BtBVoK|c@z!{4 zOItc)xawQ$wKKK`cO&DqB`f{v*d9Awi&0pX5_`^^GwY^f{(s&3>Hl)Cpomc>E92WW zag#dMs3F9NTWE@TfB)Im?(SvRiQ5>s>@x8IO}|6UU20~qR_-aBV+80gF>u8{!bM+* zdv+0CAL7nkG~Dn^kgml{&t?|0L2eFnSOxzTRt46y!lTC}1l&AIhH*gTq5&2#7SU4> zyIme2vU%=yGoeDc#Kn}uLJ+*V=%1oNMFO4a*ytk``p|-O_!?Ak>!Q!O=vy&-OZr7$X1-of8ALH|SS#jet@K&v>a_P9F{1>{mwodb z!G3N}j0uIhIeK*2*gWc`oPi8;8Kfb1kGOord8)i!noM-5wQ=*I=msJ0w4K6A2Sqi8 z{V`}M%|s$ThL&wp5QSZ}>&8`uecJH1m|2qD=+1vLP0ieESBtvulQVt4X!^vP!FI2C~oUz;}(+f%7VBuqOU%-+-RII-EYIqH^?;El{b>!iNW7q(cIh>QVmI zPVVf@7|6!X(>teV9h+mrHzY`YY`n4h_80*F)_L{AWdd%@6vYk96n zbcbd19Cn&b7uOaj^1hPX@_ZF#-*>m%!)}}f^6(xJJqLpW7|@h)$wh(`?!G$ah!n(# z4hn4)P;$)6^=SRloU%vff&13^3%u-}*K)9~ja(uMjl))d@;m z`aiD`$H{uXv0_ZD*MN)a0}tcL9%JOZIkqO|%5P577;WyOa8k$x$Rwv?ium(VB6b5g9T%$DBJ{g_5ap=JP%DTZz#2j9t(zZ0x~K+q zS@_q@a$aG6HIq*F+Dqb7dhf?*ii$4LG{I8cAsW>Ek%knWb>+cPW}(y)8Pru7#_wkU zQ?4tB+AxiPrbM*qU26~(b{?fgU9mPuyRgG77oX9lD#tFFpDYT;Ox9;eIXbZoIOF|EgnH%&C*XGO~Xd{&4wbE zXqybk_Lqi&<$=;qXCSLVLwuvwMR!3v0>S(VU3yik=u?n0%kBI>+4 zF&Id;YfY59_B399gjW?_QOgkg#ILBK0vw1LCZa_WJW^7p%0us2SP=n>qN_GGNpD?h z3#_1-!CW^-%a;w)6WZopyCFWIeaR59K@DL*5U~h?mZ0=1cF~|3)C?5zIG(Z);c$vc zaCk48 pT+|=vdgOC*WKIX-SPGO@UH_L5#Rmr9>d?=wSDNN>qmFxd{$G)*VM_o2 literal 0 HcmV?d00001 diff --git a/Dreamer/local_dm_control_suite/__pycache__/stacker.cpython-37.pyc b/Dreamer/local_dm_control_suite/__pycache__/stacker.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8ca16eb0bc7b0cd6b82edaa1645b6916596495d2 GIT binary patch literal 6949 zcmc&&&u<$^cJ7~K^QS0{#eeZkq`?XqG!|&fN?6FU-YTAENVg7TFxrZmYu4@|8+8WcD5$Xe@ZRnIY zkvH3>%3H`=ZA<0t&~DpkFNB3bv0YSk#n2g)+9j2D!t$WfuBdzo`D(kW^5w8LsJH7X zUqODUy`=J0!Ag5Y?5a z^7`?m6RmxjU1XPzwDvdn<>M>t@`=vA!Pl70?5@SGux}pehkE;)U(X)DV$K>elOFDn) z_+sFRI8J4ia>-6S3K$NPD9Uj#`i3*8{ue@j(0G^{JH> zj*FRz!z!dsW*zEl+KD-}GyAx7q8*nr`$X5Z-PY$CzE?p%gB4#Gsn)M{4IGCvdH5Mm zM-tH;)1)=2B>8c+ zAk0n3aar&|yvMz59QJcB<>U0Fh3(;7Vs;?S?Rbn%)uG^Ndf=%uZPo-yLY4*hnWv@| zRjMdq@@KwCAW{=qX$zm^TcNfpWXx~`e88hrnpmPN^>{k;1(gf=A|UrTU%V{b-`IG# zxzQ}hV&q{FJdtHO{&_P7(md9n-|0$yTU^2+D<-`wNOkJ6iI;NN_J zr4G2b8O9wy^w_|g>7SR3f|TE!i)45p%LD&8&o}iQd@HFV(TuWQ)vIPhzo4%e;s$D@ zwF9Nx?2xGFSHxrEQR?vk8K&dFwLX+1vzYNh|Cu(mQ!}$NEhAMir-jTMUJtaXnOc1) zL00G*X(7{@^$$j7{;lyd9fxjDuFVg9wnR7eljrV!knXs*TqTy)O9Rt2b@6R_nd&7Q z-BB1Gc=0x5wC8V!91?~)#Dk_G8}q%V!GI?zAIdW2yf7F9X|pJc&vp)ypp(d@u80TT zsnGL{+X@Z@*@LV+jP?Q%N9s(My^o%*Z+`alvA6l?$-|A!htHZtaTDFdElS>{0W17; zfcG4B-AnQX7l*S+E!v6 zI}Wjig}izkxfDnMO+&ngmy!si^FHN2poDZv{1zo7Sn*v-ejCYaA{5`FCOT{JJCxj} zgtY#3aeeS9s{b!>-9cLi&*EnycMnfOU*uX_ho>}{-d4~Ck7F^5*?8Nm&Wg-A(%S`A zVrA46S%p>cc32HMyQFqC`Dms^xvu(*1>^<%(Cu;`Fvb(N8w-V7GcWVRkAmSS^ix#N zJWYo*n|`!5l8dq~+H9uvVOxD8(=TZNUGvz&V*_?|ybJX1;prE#9)0qixefJWcqDfi zCu%JK(!{-<1knv3Y0`A}{BXpRR@0Go1h8PzVt(pNYZn8g5e;QwPpKimD~P0tF;d@b zn$k*m*p(IaagRnOvXmDv@chv%Z^xsEapo<0{0X*`C>Sy7`sC6~Y^~S&ErdZrp+^aW z;OVR|)P^s;y5T(z*u$1-bEfz`j1V6p!OqYEjeJNoA5(Hpk7nCIFCVRmIt^p8JYQnI z1+;E<=z!=)5J-DbYuiR9)iOQR4~^ssJfoSxUmjaTzu+HrtlRDv`bB`A$*i8yHE@__ zW{CEz#X$?a<*dcR7fKl*l#X^o%ghap(E&S?cjx;(bF>k=2}O!U0QcgP>Lug^;nPSA zFaHO>m*m=|K9>4K8oT>=4%VAxSq6cLaL`c)lm|V;J?durLih(nMv@(W$j?e~O!{Gu zcSk<_*p5<>rmci3%j836I#D^Z7HQHL6E){WUR5J*F%KO$$IJ@{v;ya2;i(&w)%gjW z?Yv+rv@JT1g?<4R#ujIy*yJ zlhOozQk$|h>OnXxe;gzL zzk4Xc5QA>_jL1tcVD2fo7s3Fb(vwycivjjU3zs&vzn8ym$+{|mH+q!0bzb5%^)UuM zNJpMvd~S{U6@BvNe1os_0hQfsmZamUll44V_J}ZyLXLdZ^PmYK;Q&fS&tq}N^TZ$E zyW&$yen82ODCtphDrA)-cc%dKkC91;Rl&U(j_p|Giesvl4k{Nv1so_PWYMpncI6AL zs|@R?)V5R}rgY>0zX`pyQ}q4Wm&>$}`8eNDp#4Jif+EH%|-v8pfQgkz?&Zi6Aser&drw$ zo`~_*25n$VQKwe9?BaJH#N)Qgx$cZ!-gfUt?iL)x)(tm83;^p_fC1*|rg1KW)@;am zc7?I-);0nYjNRYim^627GyX;J9UdL&TxQ@EaQK#5(^dD;$fSoOu( z_ENAYp3aw+)4bd6(_!v!Z@3&}5$o~YEo^Ie(8_zXK2v-UqDlGILS#2E%VJrO1qXe1 z0YprB>ke3Ssv-+F;`{E_*-zX6*1GF)#FenX^AUCUYUtv7{wPHRnFgTX;emUd@vc7# z(`LT%1$+^<`ow|sGH0OtnDfkFE6AGLD+Ss)S$Q0?v%SA5`Tch=^Xd-*YrpTPW zD&GlV-3Tv-AAW(dLa%~0xu(>j<#_?L$MafuRB(`dh|IjZG{*-Mse72?U5q66;CM5A z5x8bR3`DWX4B;Cpf0hRn{Na#CU=ma*mk|t~W94=-f-_NeM|hI2q*=)E_uH7@lsW?9 zfGN^s!2tMJ^1M!nkPgstiW~bF@MAnl1Bq5C=&O1GflFDxiua`c>V6k|VhMf8kd+Z{ z!J1%>0PHEu4sSqoruWUhHAP&Dw>`y`$h7cR`uJNJQt@eK^Rqk z%@j4q)vTB~h-HnRAnYLzk6yJ3MZTqTt=7C1ZDsiRQdT}Rrj@Lc+5LK2yra>Z*_ZNn z;SRh%K|_D(P+vuqy{1j8n7P3m%v{TAS#?!Id6|_k(qv^;IYcOia`lcjeui-?0H2Si zb+px39c}n}R%c6Vn(A+|#-eU{QCFnZuL6cgVCU-Xwiz{$S$N=YtekM9{{dC?gRQ%5U&W_86L4q*9wV69g z@fhbORw<#VNVJf^5DGaY5;61&t=@w+J(9;g{lVI>G}#U2f#a7wO_P36A9n@JBLisCmae)?1D`Dc{;IVJhJ zVYIkiQdmcP@N^Hkap$Y7hVee8Rr^b1bp%silnU5Kh(Ms0pMa3h%qJu zs!V9kQv-g1;$PxP2yHZzVq5%8OD~%%%8aidEt?Hhs_PB23KL(|ucBwem|V;a{Phhl zOr1=8!OUoGqJ}4kprF2pqE2ve#dN4YNcyZ~sFHo6ZY_}4`USCv_9C-XZ(Fq&QSYcS z%#ucCx0AL9VNk2mf=lnbREK=?{0 z4#|B{%pfaI1?~?ydw^TCoWvbsM9$=XM7a?qKcIwkQ3xamvKIOUlE3|05V!nhS8I+e#OGi0?eC*9xr_v!ePya+W@ri1PK-YSgziEIF++|_;M$5O zqZp$9+-{WVF6`#3(jCmn#|UNqg}zJ_!qO-2oC+>q;Tab{@1H{^T}?v;;G^xH@ROlZ zj|Cn|Gl;qbt12c>#Lw~hW;u^X6lnYfDx^6GqFe{cR4k=&$N?0HB8lfqyBo$7AbKA? ze!B4xkP07IP+L7`kzYW+3cDyjkfx(HTDIFOwZ zl)6JJlLF;(_-C}u^o?|lv@K7|fSXCL?UZ5 zX42tdIA5e`8!L103%q6nwHT5UQt1=-DO>f;nubw_kngs;$v3es>v@Fo5c zi7cO9;|iN9YNr(~5#~3P7ry?+SCG}@x7dv{qef{ECZa@bw6Of<+6r+6nqP3+ia{%{ zC4NC2=!y=WjV?UB$B&*o+7tv2va;#-dYn=GgsXv#KYX6;6Z7>||Ib04k zJF_}7OOdM~+D3AnpjF`j1%l>MEP=K_p8Jx2A&ZlS%(7Tf3f;^@MO*1pKju%)M3 z`(=J{bcvlk(fKN?u*$B-(>x<_%Wa(weUi-}qD{ z9nJV^J{l_2t>Tlog03Gs2Zv)RQJn1}&Ov|+= zdZzCiy9Um!2OhfI^a$V26DZ1%(yge`W_Y-g+z3$zNJrKML9;SWX33rdi^TvbTebizK)ffHb zdZ}W$oUz=Rn)GJ5zn!<_mi)tBMmAh!}ta*?2xZvK7+SZO2(z)bbajY~~` zmAl+|ycNQh!2|Bgb%g^)7WU1pt&Qz1+<-Ga z>@PL3gr?tIU*Ai5eEop4AkNmaFj&7WI2IA>QPK$_pY{9_0)9FOGrm4PasQ}T?($46 zlRbeRPkSE0$3fjr%Csr=!9I$_1s{_GYj`^ z4$n_7`|EE^`=k`~h}M;90I{(tTM!gTl7-aWPb1x(g7rh91W;yhKE z;sRdN`qO)>m3zDP?(d`DbaR*R^_*DRk^2Qf?^FC$+|5m7RXenbY77pd`G47JwVZMT8&MlI!9@>WlRXkPD|jRZz6nA}{2v>~)^Ua8KDTmM zf@j9DotY9B`f)Wk`@a=x$7W{jS8_YA?i#tC+06X8k()m;{z?aOtl=A9K3R!kZw4tX zV8m9a#RU)`aALZEVsVUl0!*6mezU5;T>W(KC=EMlv9K$W-i%Zi)h(q81aZBnZ%hGM zobhjcvblZlle_-*osT!Rwl_X)I^qnvonm3pP`X0t3GpgTRPC1wT3L`jIE4oZn!D&G z^`&Mu^m*OY9o@oPwoQ~PA-jf$Fp~Zq^k@!od!UIGh@CNl;GMvr{Xox*5g0qFF!Mw| z(U0|9AKA+>#TBOfc1*DL0h?TTsi!=%)xxsRjBLR+V}tY*k%sqv@x!l)pDEF zp6SOYR4{{M~z$8au>7L)tFJ8E#$Sl&K7@e zoTzbU%5ja{EN5NH8+nbLeTJQ5-sQ4=F1O0|m-0ERyHd8FE8FKM?W_6xv^}qk+}xp= za_6}6u>D;qrcr~P$B7nj(hC>0A8Cj8@&%ms+sBLf;%JUtJki<94~#E#@sa$#bfS;u z*=3A4!!C~&KG$}2Y3iN)%x&-q^-Jit$S&neCvb?q&>w#ET@Cy>11`f6ct$4g%J9Q) zmDU}WA@*QI#&(VI-7uC0ZmblQ?C!+={e*=)-SO1BCCJp z3eH;IJ>KCVOn~?JcvvsxwuQxpi>>uXiHI1cI$EcqaDq-Y2(m;JcFN;45zonP534C` zga7y#66I1qKa3eaEG()gL*NCV(9b| ztb)!(MN;F&Yt%S@z+s*sn;Dl~>NDTtj_cI1+C35pO=1x-Ja_s$>m9Onf!dI^`axll z2o_Ezg3bdtI)&L!(n9wpf<%^$bQTFz`Z9_xMCtp>$9kq^5SJ(V54B}n?~jd#uwhbM zVWvz-bWz!jk|4vE(R!yaS=a+#Dk*F%pBkUvxHlr0eJtF6?=h`F%h}rSl^K)Xx*;ud z`W}dJ-)+p*=nI}@N4|9DnoEWIhe0&p8$!@s?IxlZWTJuT3YSz@sbi#j;KJfFWylK) zuwsR~HMqYKaWbd!HVRj5KtjL{`hAX0)v`xXr7`eD3lk0j&eV^>tgz@xadOl+S)!I} zo+SqalK`LK^nPczsO{0(G_Uw=Y)-7v4()y*Xz3Px)FiYBKt-jq7e?5f!jY`1D_Yw0 zqWq9vX5lIT$zF>y`0!;-O#(xbRfU1HFhLa6Bw{{YdNt;ch)|cP3$nO^u3eDz{vMw6 zLlm0fK-(@t?>6*?Vd+btTzy&U;d!b9wStGZ|&aA?KR_jKEdJBaI>h z!SoRz27Q6>siX{MmXt|ml3ApgVJ0nTGn3h@N*X28NBCwAtHE@-(46K>T^(*fS(lTu z)fj%`|DtK2aNu(*lN3axNRC=VH|1UFctLyA5LOJ}Hj*NtNB|BceIR&1R~T6--@k`b z4efWl)-~@P2quS^*=ii5qtc0Bl4Zxj8Bfuyh?^KB@hd(e>SHR%w?LDE39X>+wq9;R z-tOFZ4%sGr8uy~dC&=~qGd$7{)eO2P7kApgEiUV_wS(5lk7V)#JSiQ9Yi%9Dt-jE3A|wqSq<+hbHVAlxPoEBTcso6rD1On1r&KH1s*UK(ihmf%L4>E zJoO;Gv(d&lz?)!(tU5V-5;M0!$??4@Uq&mDfNx7FLUIss#XA}xajfAiy5iE=mi&CIO;TQ>b ztFTo^SOE+yEUJ`7Y4bU;vS}9;x+zvzEXabQMu6}SNd&D!D6ImKWV#phIh1CS!Hc(2 z9_<$MG?||zemVJ~R3wQAp*SQvPv5e@T3_HvN#|)s1Jbvm4_C^2XuUYygtcVdCahh= zM+U}@j9eR;ClE@oNgrMtvx$z(pp%2R!xgKjX2LGeYO}4TRTy!9CTg#=2=(2b9PUa4h(US@Yq z!UPbFwj2RC+Hqisif@dMt8Sg-&Qtm|gI>Sx9j+Zw0Ko&X<_$fOsOt!#)^jpHn@!{S zf8!@s@F9r3;%yYqN$)i+5#d!pqmXMsa1aSqAaQ^d5FQb83il8nzFfU<4i5c;BmdFx zG|LvGKA7(7XBe51Z>|}nj)&*QEIZY2!YwOdkl6U1^r! zvq4{`-dp^z6Ad65U9cm`${a#dxn`RnoWm#WV9VYSJs4R65~m01T>lOG8S{nGbPyr3QH(g!w~^|aQuBn_NBb)YA!%#nJs#$3{q zpF`r>4zSh`uVa$o@7$dDxI5d3f_6yxz-Q^~NhPCX%7b>=AHcsQ|FyN*A=N@Zdeb%1 zlM?2nqozGE5MJsCE-6|dU)vo1=baduH;9nWlHPV0my!%+wo^uTJ2KNVn?3S8m4mqM zebQHYd(GpBqLD2MydC6?`bRC*qjgWF%eEj79J!uycBF%1nc(s8n`ECjoN?0n9UN0; zfdUWih_^F4#Tp%XGl``xhE9Zr#*!(9PUZ^+8FGPH2rm42^VbQp>)Tn}`WvvzYbFR0#qk2lV~Q zzP(>%l^a@KnS9y!8n-#DI@9Jb=Z2QmG8c(OV^q%(h2i#VxP5o{Ut7owDl1DDG`3(f z=>jjv#_d986D0z9KTHual(?9M5b6Fhn8Hm>>=J?ZkKXQwYhFKW%KRjC6r7xZ1Fm#~ zydSANXdiMNrBDWONUl6ClQK21ydgFXcy-4cn5H$o(oY-bHD|zF>I{IHaM1nf-_Y;Z z?{~i@fB*8n*vI`)*p@;T00Rn-3jJ_G9l=j)D6vB>sJoYmzn}qzAPphtayKEnly4~B zRM^KnD(}XTCq%ArY8A@lt4zNX7WFQwe>HC&bfp3y<^}3Hc|4aySwv8oYBI7}W{=l;o1MC212h=_YG< zYhq+#FS(y`aUgvhLdUG7S(7gLj5SvHl80?cpOS0H&nXUL^LKd|AI2?!lQx`^)h6ak)d>e3X6tHp3TU_ z^roE~$_;yu0*Lej6qb?)!$BPK;vXahSp@i3rZXoqxwa~i)3q_G?lhgo()s@Z2~PDX literal 0 HcmV?d00001 diff --git a/Dreamer/local_dm_control_suite/__pycache__/walker.cpython-37.pyc b/Dreamer/local_dm_control_suite/__pycache__/walker.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8200b5dd1deb217656390f4054c753f0665680a3 GIT binary patch literal 5282 zcmcIo&2JmW72i*CMN+aX%aR>A@}y~-Fjc6cEz-tKYs;~csC8t+l96;#V7=lDsipUW zon6W{L7(g*C#T1vMGw-k{d;=aQ-Pjz?yaZ(-Yl1-YzOEEy2K3M&iA~J-+OOw&d=90 zJbzs|;{J-J{heOQ#{zK!C0*AwjccyPb#8=uWVnV-v(Y;T5(PscQ*_j5EH*CQs!}}iB~MQ;;}qRL z{y{z0yFXOVKdzrbuG*=Cs*N&vOi&-Aq>HFB?O5mfiJoaaqhsKv#&G+d$ObY_na{F8 zKNM`dXApN;b|_esa1pXEKfle=4A<&1oDgGq(9T%WVSbv5ENu;Ij})g`kE5_@N;*Ye zlfvhonx9v;_V%{-_x`NO^XO95KR?_&Od_#)B)A`En_1v*-olJX$jvZm`=Q4pZ_Fz% z9R!)!9BGlqr`E{muLs&wBeQz;$Q)T6W29vjZvM#_8NWBa*1gtmf6bhfmU= zo#y8{GKoAe)hoC^zF481g>uRYcFOD9@lha?I1+J|pZ7k#v%7!q&TVgh=kx8o{q4I= zTdrc51S)9xl9-P59zv(OX8>lq1p*M6#xAX;FzJ>RH zm${h*=IUpdYxgP>=2p2e!(4N?GRs`LZNb~7AunQ6bs5m=Vwcd+bR?OEe1j^|0J(-L zuX|r?-TtMwcXxaHmSUE?LVa(dnq`Yzr`D@fF{&ntTCl((?YoYSS2^%DTK`WRxH!v! zdxQADVnEHicYpVPy}v03@h@I`{qNBAQ6>-B;0=`Y`>2HG>Tov(*Iknv+yty!+~PL= zZC>G3{8zZc=kQL32}IHYyvC|qCw~rkg`WXcF26r1A6mA))79iOQfumNCj4f zMKC}p!F8K#kRJM{Ln)JA05Q^fF!~-2by-s=c)jFvUnV`FV4IQYvElpvV{8DV%MzI; zsMSW(=0}MPc?&jes{y$&95&6|jsavmx457Ax%D{m$wyeJ2sD7ANPvCy!_o(G1 zGiVftDOUS629&2=K6~0?!0Qi7kTNR)ZYU|3!w=3JIFZ4S{9nk91Uy2JJtoLitJ_w>^7a;@jOyw&&z8b`P)HAbi?z0 zGw{Q4Pu25y()K*LffXe=Vfh29KA>ucC;d69 zGdNQJO6wqG<<|2#*HSdF$A;oN3rI2#DM)3wSV7&xl#oTVFOoFoz%GRM(#_kh}p2*jj zL%;BNu!WcHsOTt7Xt!CBb6{LPJ_0r^qG^F-011Ii3iw18q0W*jxyz3*rI7N{__8!{ z%pEvGC2xuVp(unoGeW+B*C9$FM@ zLX?kyGvp3H8;Z6RZGtA8v4%HG7Xg4t4pJeHiiijp1J3}*IF(ttQTlptq7)ms4g%!7 zQaI+^Dg9g1&g<h zUtyp^dChQ;Fwk#!gi_C-L8$icEvSzGl+mErSk^`+-Ya;ALG=)9J~ZU+$l5DctAfjDFV|CJXhnphy!OI=YL6^l zAKA+qw^lXOjWsxIJ`ag34A&&c+kO|$yOo&Bs#4o~^@a;QjkQ8(0=aeh(d?dYUg}j;u-6#q8DP^;##4WgM zS*NjcSrQ7^VjGDxz0Z4l`&+xWywA3GKK*Pzx0RDAob^x9ubef^X`=hNQ-VWY9Y>{k zLp6DY2g>UTI*XT=RiH)^ExLY()lc+& zOy~OvRUcAC$+{%8l{cvRgsNYm!W9cxZi_H1p_%ZxG-DNZP%aptOu~+Zg~q(&C_HGs zr52|sr!%0G0;`bhBne3>A(1BLwdpqrNu1B;rwuI`7OD8k>px#e(&ysK$jZzt^=YFu z$bv8}AZ>A`vuLI`sgUgj@7DGwTla78tHj>Bz4Q6bzDi`}9*rSiBEO9)ukZWauHgG{ z+G%d@-QU^YE7F5jlXl)I_GE4OH@V#bU}Cr&PRJ}Sb$pVMcgl)eO5hO%hG aDgKppqs6pz%ft`0V>-5@FV)JjwEQplFhNBC literal 0 HcmV?d00001 diff --git a/Dreamer/local_dm_control_suite/acrobot.py b/Dreamer/local_dm_control_suite/acrobot.py new file mode 100755 index 0000000..a12b892 --- /dev/null +++ b/Dreamer/local_dm_control_suite/acrobot.py @@ -0,0 +1,127 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Acrobot domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import rewards +import numpy as np + +_DEFAULT_TIME_LIMIT = 10 +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('acrobot.xml'), common.ASSETS + + +@SUITE.add('benchmarking') +def swingup(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns Acrobot balance task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Balance(sparse=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@SUITE.add('benchmarking') +def swingup_sparse(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns Acrobot sparse balance.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Balance(sparse=True, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Acrobot domain.""" + + def horizontal(self): + """Returns horizontal (x) component of body frame z-axes.""" + return self.named.data.xmat[['upper_arm', 'lower_arm'], 'xz'] + + def vertical(self): + """Returns vertical (z) component of body frame z-axes.""" + return self.named.data.xmat[['upper_arm', 'lower_arm'], 'zz'] + + def to_target(self): + """Returns the distance from the tip to the target.""" + tip_to_target = (self.named.data.site_xpos['target'] - + self.named.data.site_xpos['tip']) + return np.linalg.norm(tip_to_target) + + def orientations(self): + """Returns the sines and cosines of the pole angles.""" + return np.concatenate((self.horizontal(), self.vertical())) + + +class Balance(base.Task): + """An Acrobot `Task` to swing up and balance the pole.""" + + def __init__(self, sparse, random=None): + """Initializes an instance of `Balance`. + + Args: + sparse: A `bool` specifying whether to use a sparse (indicator) reward. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._sparse = sparse + super(Balance, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Shoulder and elbow are set to a random position between [-pi, pi). + + Args: + physics: An instance of `Physics`. + """ + physics.named.data.qpos[ + ['shoulder', 'elbow']] = self.random.uniform(-np.pi, np.pi, 2) + super(Balance, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of pole orientation and angular velocities.""" + obs = collections.OrderedDict() + obs['orientations'] = physics.orientations() + obs['velocity'] = physics.velocity() + return obs + + def _get_reward(self, physics, sparse): + target_radius = physics.named.model.site_size['target', 0] + return rewards.tolerance(physics.to_target(), + bounds=(0, target_radius), + margin=0 if sparse else 1) + + def get_reward(self, physics): + """Returns a sparse or a smooth reward, as specified in the constructor.""" + return self._get_reward(physics, sparse=self._sparse) diff --git a/Dreamer/local_dm_control_suite/acrobot.xml b/Dreamer/local_dm_control_suite/acrobot.xml new file mode 100755 index 0000000..79b76d9 --- /dev/null +++ b/Dreamer/local_dm_control_suite/acrobot.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Dreamer/local_dm_control_suite/ball_in_cup.py b/Dreamer/local_dm_control_suite/ball_in_cup.py new file mode 100755 index 0000000..ac3e47f --- /dev/null +++ b/Dreamer/local_dm_control_suite/ball_in_cup.py @@ -0,0 +1,100 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Ball-in-Cup Domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers + +_DEFAULT_TIME_LIMIT = 20 # (seconds) +_CONTROL_TIMESTEP = .02 # (seconds) + + +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('ball_in_cup.xml'), common.ASSETS + + +@SUITE.add('benchmarking', 'easy') +def catch(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Ball-in-Cup task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = BallInCup(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics with additional features for the Ball-in-Cup domain.""" + + def ball_to_target(self): + """Returns the vector from the ball to the target.""" + target = self.named.data.site_xpos['target', ['x', 'z']] + ball = self.named.data.xpos['ball', ['x', 'z']] + return target - ball + + def in_target(self): + """Returns 1 if the ball is in the target, 0 otherwise.""" + ball_to_target = abs(self.ball_to_target()) + target_size = self.named.model.site_size['target', [0, 2]] + ball_size = self.named.model.geom_size['ball', 0] + return float(all(ball_to_target < target_size - ball_size)) + + +class BallInCup(base.Task): + """The Ball-in-Cup task. Put the ball in the cup.""" + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Args: + physics: An instance of `Physics`. + + """ + # Find a collision-free random initial position of the ball. + penetrating = True + while penetrating: + # Assign a random ball position. + physics.named.data.qpos['ball_x'] = self.random.uniform(-.2, .2) + physics.named.data.qpos['ball_z'] = self.random.uniform(.2, .5) + # Check for collisions. + physics.after_reset() + penetrating = physics.data.ncon > 0 + super(BallInCup, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of the state.""" + obs = collections.OrderedDict() + obs['position'] = physics.position() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a sparse reward.""" + return physics.in_target() diff --git a/Dreamer/local_dm_control_suite/ball_in_cup.xml b/Dreamer/local_dm_control_suite/ball_in_cup.xml new file mode 100755 index 0000000..792073f --- /dev/null +++ b/Dreamer/local_dm_control_suite/ball_in_cup.xml @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Dreamer/local_dm_control_suite/base.py b/Dreamer/local_dm_control_suite/base.py new file mode 100755 index 0000000..fd78318 --- /dev/null +++ b/Dreamer/local_dm_control_suite/base.py @@ -0,0 +1,112 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Base class for tasks in the Control Suite.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from dm_control import mujoco +from dm_control.rl import control + +import numpy as np + + +class Task(control.Task): + """Base class for tasks in the Control Suite. + + Actions are mapped directly to the states of MuJoCo actuators: each element of + the action array is used to set the control input for a single actuator. The + ordering of the actuators is the same as in the corresponding MJCF XML file. + + Attributes: + random: A `numpy.random.RandomState` instance. This should be used to + generate all random variables associated with the task, such as random + starting states, observation noise* etc. + + *If sensor noise is enabled in the MuJoCo model then this will be generated + using MuJoCo's internal RNG, which has its own independent state. + """ + + def __init__(self, random=None): + """Initializes a new continuous control task. + + Args: + random: Optional, either a `numpy.random.RandomState` instance, an integer + seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + if not isinstance(random, np.random.RandomState): + random = np.random.RandomState(random) + self._random = random + self._visualize_reward = False + + @property + def random(self): + """Task-specific `numpy.random.RandomState` instance.""" + return self._random + + def action_spec(self, physics): + """Returns a `BoundedArraySpec` matching the `physics` actuators.""" + return mujoco.action_spec(physics) + + def initialize_episode(self, physics): + """Resets geom colors to their defaults after starting a new episode. + + Subclasses of `base.Task` must delegate to this method after performing + their own initialization. + + Args: + physics: An instance of `mujoco.Physics`. + """ + self.after_step(physics) + + def before_step(self, action, physics): + """Sets the control signal for the actuators to values in `action`.""" + # Support legacy internal code. + action = getattr(action, "continuous_actions", action) + physics.set_control(action) + + def after_step(self, physics): + """Modifies colors according to the reward.""" + if self._visualize_reward: + reward = np.clip(self.get_reward(physics), 0.0, 1.0) + _set_reward_colors(physics, reward) + + @property + def visualize_reward(self): + return self._visualize_reward + + @visualize_reward.setter + def visualize_reward(self, value): + if not isinstance(value, bool): + raise ValueError("Expected a boolean, got {}.".format(type(value))) + self._visualize_reward = value + + +_MATERIALS = ["self", "effector", "target"] +_DEFAULT = [name + "_default" for name in _MATERIALS] +_HIGHLIGHT = [name + "_highlight" for name in _MATERIALS] + + +def _set_reward_colors(physics, reward): + """Sets the highlight, effector and target colors according to the reward.""" + assert 0.0 <= reward <= 1.0 + colors = physics.named.model.mat_rgba + default = colors[_DEFAULT] + highlight = colors[_HIGHLIGHT] + blend_coef = reward ** 4 # Better color distinction near high rewards. + colors[_MATERIALS] = blend_coef * highlight + (1.0 - blend_coef) * default diff --git a/Dreamer/local_dm_control_suite/cartpole.py b/Dreamer/local_dm_control_suite/cartpole.py new file mode 100755 index 0000000..b8fec14 --- /dev/null +++ b/Dreamer/local_dm_control_suite/cartpole.py @@ -0,0 +1,230 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Cartpole domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import rewards +from lxml import etree +import numpy as np +from six.moves import range + + +_DEFAULT_TIME_LIMIT = 10 +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(num_poles=1): + """Returns a tuple containing the model XML string and a dict of assets.""" + return _make_model(num_poles), common.ASSETS + + +@SUITE.add('benchmarking') +def balance(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the Cartpole Balance task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Balance(swing_up=False, sparse=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@SUITE.add('benchmarking') +def balance_sparse(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the sparse reward variant of the Cartpole Balance task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Balance(swing_up=False, sparse=True, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@SUITE.add('benchmarking') +def swingup(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the Cartpole Swing-Up task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Balance(swing_up=True, sparse=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@SUITE.add('benchmarking') +def swingup_sparse(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the sparse reward variant of teh Cartpole Swing-Up task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Balance(swing_up=True, sparse=True, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@SUITE.add() +def two_poles(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the Cartpole Balance task with two poles.""" + physics = Physics.from_xml_string(*get_model_and_assets(num_poles=2)) + task = Balance(swing_up=True, sparse=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@SUITE.add() +def three_poles(time_limit=_DEFAULT_TIME_LIMIT, random=None, num_poles=3, + sparse=False, environment_kwargs=None): + """Returns the Cartpole Balance task with three or more poles.""" + physics = Physics.from_xml_string(*get_model_and_assets(num_poles=num_poles)) + task = Balance(swing_up=True, sparse=sparse, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +def _make_model(n_poles): + """Generates an xml string defining a cart with `n_poles` bodies.""" + xml_string = common.read_model('cartpole.xml') + if n_poles == 1: + return xml_string + mjcf = etree.fromstring(xml_string) + parent = mjcf.find('./worldbody/body/body') # Find first pole. + # Make chain of poles. + for pole_index in range(2, n_poles+1): + child = etree.Element('body', name='pole_{}'.format(pole_index), + pos='0 0 1', childclass='pole') + etree.SubElement(child, 'joint', name='hinge_{}'.format(pole_index)) + etree.SubElement(child, 'geom', name='pole_{}'.format(pole_index)) + parent.append(child) + parent = child + # Move plane down. + floor = mjcf.find('./worldbody/geom') + floor.set('pos', '0 0 {}'.format(1 - n_poles - .05)) + # Move cameras back. + cameras = mjcf.findall('./worldbody/camera') + cameras[0].set('pos', '0 {} 1'.format(-1 - 2*n_poles)) + cameras[1].set('pos', '0 {} 2'.format(-2*n_poles)) + return etree.tostring(mjcf, pretty_print=True) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Cartpole domain.""" + + def cart_position(self): + """Returns the position of the cart.""" + return self.named.data.qpos['slider'][0] + + def angular_vel(self): + """Returns the angular velocity of the pole.""" + return self.data.qvel[1:] + + def pole_angle_cosine(self): + """Returns the cosine of the pole angle.""" + return self.named.data.xmat[2:, 'zz'] + + def bounded_position(self): + """Returns the state, with pole angle split into sin/cos.""" + return np.hstack((self.cart_position(), + self.named.data.xmat[2:, ['zz', 'xz']].ravel())) + + +class Balance(base.Task): + """A Cartpole `Task` to balance the pole. + + State is initialized either close to the target configuration or at a random + configuration. + """ + _CART_RANGE = (-.25, .25) + _ANGLE_COSINE_RANGE = (.995, 1) + + def __init__(self, swing_up, sparse, random=None): + """Initializes an instance of `Balance`. + + Args: + swing_up: A `bool`, which if `True` sets the cart to the middle of the + slider and the pole pointing towards the ground. Otherwise, sets the + cart to a random position on the slider and the pole to a random + near-vertical position. + sparse: A `bool`, whether to return a sparse or a smooth reward. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._sparse = sparse + self._swing_up = swing_up + super(Balance, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Initializes the cart and pole according to `swing_up`, and in both cases + adds a small random initial velocity to break symmetry. + + Args: + physics: An instance of `Physics`. + """ + nv = physics.model.nv + if self._swing_up: + physics.named.data.qpos['slider'] = .01*self.random.randn() + physics.named.data.qpos['hinge_1'] = np.pi + .01*self.random.randn() + physics.named.data.qpos[2:] = .1*self.random.randn(nv - 2) + else: + physics.named.data.qpos['slider'] = self.random.uniform(-.1, .1) + physics.named.data.qpos[1:] = self.random.uniform(-.034, .034, nv - 1) + physics.named.data.qvel[:] = 0.01 * self.random.randn(physics.model.nv) + super(Balance, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of the (bounded) physics state.""" + obs = collections.OrderedDict() + obs['position'] = physics.bounded_position() + obs['velocity'] = physics.velocity() + return obs + + def _get_reward(self, physics, sparse): + if sparse: + cart_in_bounds = rewards.tolerance(physics.cart_position(), + self._CART_RANGE) + angle_in_bounds = rewards.tolerance(physics.pole_angle_cosine(), + self._ANGLE_COSINE_RANGE).prod() + return cart_in_bounds * angle_in_bounds + else: + upright = (physics.pole_angle_cosine() + 1) / 2 + centered = rewards.tolerance(physics.cart_position(), margin=2) + centered = (1 + centered) / 2 + small_control = rewards.tolerance(physics.control(), margin=1, + value_at_margin=0, + sigmoid='quadratic')[0] + small_control = (4 + small_control) / 5 + small_velocity = rewards.tolerance(physics.angular_vel(), margin=5).min() + small_velocity = (1 + small_velocity) / 2 + return upright.mean() * small_control * small_velocity * centered + + def get_reward(self, physics): + """Returns a sparse or a smooth reward, as specified in the constructor.""" + return self._get_reward(physics, sparse=self._sparse) diff --git a/Dreamer/local_dm_control_suite/cartpole.xml b/Dreamer/local_dm_control_suite/cartpole.xml new file mode 100755 index 0000000..e01869d --- /dev/null +++ b/Dreamer/local_dm_control_suite/cartpole.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Dreamer/local_dm_control_suite/cheetah.py b/Dreamer/local_dm_control_suite/cheetah.py new file mode 100755 index 0000000..7dd2a63 --- /dev/null +++ b/Dreamer/local_dm_control_suite/cheetah.py @@ -0,0 +1,97 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Cheetah Domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import rewards + + +# How long the simulation will run, in seconds. +_DEFAULT_TIME_LIMIT = 10 + +# Running speed above which reward is 1. +_RUN_SPEED = 10 + +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('cheetah.xml'), common.ASSETS + + +@SUITE.add('benchmarking') +def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the run task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Cheetah(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Cheetah domain.""" + + def speed(self): + """Returns the horizontal speed of the Cheetah.""" + return self.named.data.sensordata['torso_subtreelinvel'][0] + + +class Cheetah(base.Task): + """A `Task` to train a running Cheetah.""" + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode.""" + # The indexing below assumes that all joints have a single DOF. + assert physics.model.nq == physics.model.njnt + is_limited = physics.model.jnt_limited == 1 + lower, upper = physics.model.jnt_range[is_limited].T + physics.data.qpos[is_limited] = self.random.uniform(lower, upper) + + # Stabilize the model before the actual simulation. + for _ in range(200): + physics.step() + + physics.data.time = 0 + self._timeout_progress = 0 + super(Cheetah, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of the state, ignoring horizontal position.""" + obs = collections.OrderedDict() + # Ignores horizontal position to maintain translational invariance. + obs['position'] = physics.data.qpos[1:].copy() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a reward to the agent.""" + return rewards.tolerance(physics.speed(), + bounds=(_RUN_SPEED, float('inf')), + margin=_RUN_SPEED, + value_at_margin=0, + sigmoid='linear') diff --git a/Dreamer/local_dm_control_suite/cheetah.xml b/Dreamer/local_dm_control_suite/cheetah.xml new file mode 100755 index 0000000..dbce06c --- /dev/null +++ b/Dreamer/local_dm_control_suite/cheetah.xml @@ -0,0 +1,73 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/Dreamer/local_dm_control_suite/common/__init__.py b/Dreamer/local_dm_control_suite/common/__init__.py new file mode 100755 index 0000000..62eab26 --- /dev/null +++ b/Dreamer/local_dm_control_suite/common/__init__.py @@ -0,0 +1,39 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Functions to manage the common assets for domains.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +from dm_control.utils import io as resources + +_SUITE_DIR = os.path.dirname(os.path.dirname(__file__)) +_FILENAMES = [ + "./common/materials.xml", + "./common/materials_white_floor.xml", + "./common/skybox.xml", + "./common/visual.xml", +] + +ASSETS = {filename: resources.GetResource(os.path.join(_SUITE_DIR, filename)) + for filename in _FILENAMES} + + +def read_model(model_filename): + """Reads a model XML file and returns its contents as a string.""" + return resources.GetResource(os.path.join(_SUITE_DIR, model_filename)) diff --git a/Dreamer/local_dm_control_suite/common/__pycache__/__init__.cpython-37.pyc b/Dreamer/local_dm_control_suite/common/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b31a8012bce217329f62592306e76a60f9ab6b4a GIT binary patch literal 1008 zcmZWoy>8nu5GH9^mSsB*+8{$FH9!Y1cF{Qqnlw%kAjuG?Kp~(YSdq4wN}@|rZepZU z^9Fr^c4|tP@ z6EE)Y7H?l6(b+@aalFDieB}zoU0xrdUU%_&zsM6SRc<1yqKxHiA|g8#QKGU;E%~OUHNlkHJNxTCDd6 zx2XpivqDRjn*Mo~E;gU~>DQ@*?Bi4^UBx_qh%uipKB@EnoKU)8sdFaSafC%+2e9A5 zHk$wvU80X0->^m3*pjo_wP)*>cuAJ{68|7yyb=0|dVbl|!l*(g!j!Fd!k*lnN}^2Z z&6u5*-kFkl8Pd`FgVQ11J2>f)lJsAd!C0muXPMAGl&j!ocQ93%7|aD{xgA)^276k# zkU^>vmQtS4MCDeil$ru0-*<>onM+G)e|B*bZgH7dXx(i4g%jhH`B<;Q>p`tk?W$I4 z*TDEkC2}fZ=y + + + + + + + + + + + + + + + + + + diff --git a/Dreamer/local_dm_control_suite/common/materials_white_floor.xml b/Dreamer/local_dm_control_suite/common/materials_white_floor.xml new file mode 100755 index 0000000..a1e35c2 --- /dev/null +++ b/Dreamer/local_dm_control_suite/common/materials_white_floor.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + diff --git a/Dreamer/local_dm_control_suite/common/skybox.xml b/Dreamer/local_dm_control_suite/common/skybox.xml new file mode 100755 index 0000000..b888692 --- /dev/null +++ b/Dreamer/local_dm_control_suite/common/skybox.xml @@ -0,0 +1,6 @@ + + + + + diff --git a/Dreamer/local_dm_control_suite/common/visual.xml b/Dreamer/local_dm_control_suite/common/visual.xml new file mode 100755 index 0000000..ede15ad --- /dev/null +++ b/Dreamer/local_dm_control_suite/common/visual.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/Dreamer/local_dm_control_suite/demos/mocap_demo.py b/Dreamer/local_dm_control_suite/demos/mocap_demo.py new file mode 100755 index 0000000..2e2c7ca --- /dev/null +++ b/Dreamer/local_dm_control_suite/demos/mocap_demo.py @@ -0,0 +1,84 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Demonstration of amc parsing for CMU mocap database. + +To run the demo, supply a path to a `.amc` file: + + python mocap_demo --filename='path/to/mocap.amc' + +CMU motion capture clips are available at mocap.cs.cmu.edu +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import time +# Internal dependencies. + +from absl import app +from absl import flags + +from local_dm_control_suite import humanoid_CMU +from dm_control.suite.utils import parse_amc + +import matplotlib.pyplot as plt +import numpy as np + +FLAGS = flags.FLAGS +flags.DEFINE_string('filename', None, 'amc file to be converted.') +flags.DEFINE_integer('max_num_frames', 90, + 'Maximum number of frames for plotting/playback') + + +def main(unused_argv): + env = humanoid_CMU.stand() + + # Parse and convert specified clip. + converted = parse_amc.convert(FLAGS.filename, + env.physics, env.control_timestep()) + + max_frame = min(FLAGS.max_num_frames, converted.qpos.shape[1] - 1) + + width = 480 + height = 480 + video = np.zeros((max_frame, height, 2 * width, 3), dtype=np.uint8) + + for i in range(max_frame): + p_i = converted.qpos[:, i] + with env.physics.reset_context(): + env.physics.data.qpos[:] = p_i + video[i] = np.hstack([env.physics.render(height, width, camera_id=0), + env.physics.render(height, width, camera_id=1)]) + + tic = time.time() + for i in range(max_frame): + if i == 0: + img = plt.imshow(video[i]) + else: + img.set_data(video[i]) + toc = time.time() + clock_dt = toc - tic + tic = time.time() + # Real-time playback not always possible as clock_dt > .03 + plt.pause(max(0.01, 0.03 - clock_dt)) # Need min display time > 0.0. + plt.draw() + plt.waitforbuttonpress() + + +if __name__ == '__main__': + flags.mark_flag_as_required('filename') + app.run(main) diff --git a/Dreamer/local_dm_control_suite/demos/zeros.amc b/Dreamer/local_dm_control_suite/demos/zeros.amc new file mode 100755 index 0000000..b4590a4 --- /dev/null +++ b/Dreamer/local_dm_control_suite/demos/zeros.amc @@ -0,0 +1,213 @@ +#DUMMY AMC for testing +:FULLY-SPECIFIED +:DEGREES +1 +root 0 0 0 0 0 0 +lowerback 0 0 0 +upperback 0 0 0 +thorax 0 0 0 +lowerneck 0 0 0 +upperneck 0 0 0 +head 0 0 0 +rclavicle 0 0 +rhumerus 0 0 0 +rradius 0 +rwrist 0 +rhand 0 0 +rfingers 0 +rthumb 0 0 +lclavicle 0 0 +lhumerus 0 0 0 +lradius 0 +lwrist 0 +lhand 0 0 +lfingers 0 +lthumb 0 0 +rfemur 0 0 0 +rtibia 0 +rfoot 0 0 +rtoes 0 +lfemur 0 0 0 +ltibia 0 +lfoot 0 0 +ltoes 0 +2 +root 0 0 0 0 0 0 +lowerback 0 0 0 +upperback 0 0 0 +thorax 0 0 0 +lowerneck 0 0 0 +upperneck 0 0 0 +head 0 0 0 +rclavicle 0 0 +rhumerus 0 0 0 +rradius 0 +rwrist 0 +rhand 0 0 +rfingers 0 +rthumb 0 0 +lclavicle 0 0 +lhumerus 0 0 0 +lradius 0 +lwrist 0 +lhand 0 0 +lfingers 0 +lthumb 0 0 +rfemur 0 0 0 +rtibia 0 +rfoot 0 0 +rtoes 0 +lfemur 0 0 0 +ltibia 0 +lfoot 0 0 +ltoes 0 +3 +root 0 0 0 0 0 0 +lowerback 0 0 0 +upperback 0 0 0 +thorax 0 0 0 +lowerneck 0 0 0 +upperneck 0 0 0 +head 0 0 0 +rclavicle 0 0 +rhumerus 0 0 0 +rradius 0 +rwrist 0 +rhand 0 0 +rfingers 0 +rthumb 0 0 +lclavicle 0 0 +lhumerus 0 0 0 +lradius 0 +lwrist 0 +lhand 0 0 +lfingers 0 +lthumb 0 0 +rfemur 0 0 0 +rtibia 0 +rfoot 0 0 +rtoes 0 +lfemur 0 0 0 +ltibia 0 +lfoot 0 0 +ltoes 0 +4 +root 0 0 0 0 0 0 +lowerback 0 0 0 +upperback 0 0 0 +thorax 0 0 0 +lowerneck 0 0 0 +upperneck 0 0 0 +head 0 0 0 +rclavicle 0 0 +rhumerus 0 0 0 +rradius 0 +rwrist 0 +rhand 0 0 +rfingers 0 +rthumb 0 0 +lclavicle 0 0 +lhumerus 0 0 0 +lradius 0 +lwrist 0 +lhand 0 0 +lfingers 0 +lthumb 0 0 +rfemur 0 0 0 +rtibia 0 +rfoot 0 0 +rtoes 0 +lfemur 0 0 0 +ltibia 0 +lfoot 0 0 +ltoes 0 +5 +root 0 0 0 0 0 0 +lowerback 0 0 0 +upperback 0 0 0 +thorax 0 0 0 +lowerneck 0 0 0 +upperneck 0 0 0 +head 0 0 0 +rclavicle 0 0 +rhumerus 0 0 0 +rradius 0 +rwrist 0 +rhand 0 0 +rfingers 0 +rthumb 0 0 +lclavicle 0 0 +lhumerus 0 0 0 +lradius 0 +lwrist 0 +lhand 0 0 +lfingers 0 +lthumb 0 0 +rfemur 0 0 0 +rtibia 0 +rfoot 0 0 +rtoes 0 +lfemur 0 0 0 +ltibia 0 +lfoot 0 0 +ltoes 0 +6 +root 0 0 0 0 0 0 +lowerback 0 0 0 +upperback 0 0 0 +thorax 0 0 0 +lowerneck 0 0 0 +upperneck 0 0 0 +head 0 0 0 +rclavicle 0 0 +rhumerus 0 0 0 +rradius 0 +rwrist 0 +rhand 0 0 +rfingers 0 +rthumb 0 0 +lclavicle 0 0 +lhumerus 0 0 0 +lradius 0 +lwrist 0 +lhand 0 0 +lfingers 0 +lthumb 0 0 +rfemur 0 0 0 +rtibia 0 +rfoot 0 0 +rtoes 0 +lfemur 0 0 0 +ltibia 0 +lfoot 0 0 +ltoes 0 +7 +root 0 0 0 0 0 0 +lowerback 0 0 0 +upperback 0 0 0 +thorax 0 0 0 +lowerneck 0 0 0 +upperneck 0 0 0 +head 0 0 0 +rclavicle 0 0 +rhumerus 0 0 0 +rradius 0 +rwrist 0 +rhand 0 0 +rfingers 0 +rthumb 0 0 +lclavicle 0 0 +lhumerus 0 0 0 +lradius 0 +lwrist 0 +lhand 0 0 +lfingers 0 +lthumb 0 0 +rfemur 0 0 0 +rtibia 0 +rfoot 0 0 +rtoes 0 +lfemur 0 0 0 +ltibia 0 +lfoot 0 0 +ltoes 0 diff --git a/Dreamer/local_dm_control_suite/explore.py b/Dreamer/local_dm_control_suite/explore.py new file mode 100755 index 0000000..06fb0a8 --- /dev/null +++ b/Dreamer/local_dm_control_suite/explore.py @@ -0,0 +1,84 @@ +# Copyright 2018 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Control suite environments explorer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import app +from absl import flags +from dm_control import suite +from dm_control.suite.wrappers import action_noise +from six.moves import input + +from dm_control import viewer + + +_ALL_NAMES = ['.'.join(domain_task) for domain_task in suite.ALL_TASKS] + +flags.DEFINE_enum('environment_name', None, _ALL_NAMES, + 'Optional \'domain_name.task_name\' pair specifying the ' + 'environment to load. If unspecified a prompt will appear to ' + 'select one.') +flags.DEFINE_bool('timeout', True, 'Whether episodes should have a time limit.') +flags.DEFINE_bool('visualize_reward', True, + 'Whether to vary the colors of geoms according to the ' + 'current reward value.') +flags.DEFINE_float('action_noise', 0., + 'Standard deviation of Gaussian noise to apply to actions, ' + 'expressed as a fraction of the max-min range for each ' + 'action dimension. Defaults to 0, i.e. no noise.') +FLAGS = flags.FLAGS + + +def prompt_environment_name(prompt, values): + environment_name = None + while not environment_name: + environment_name = input(prompt) + if not environment_name or values.index(environment_name) < 0: + print('"%s" is not a valid environment name.' % environment_name) + environment_name = None + return environment_name + + +def main(argv): + del argv + environment_name = FLAGS.environment_name + if environment_name is None: + print('\n '.join(['Available environments:'] + _ALL_NAMES)) + environment_name = prompt_environment_name( + 'Please select an environment name: ', _ALL_NAMES) + + index = _ALL_NAMES.index(environment_name) + domain_name, task_name = suite.ALL_TASKS[index] + + task_kwargs = {} + if not FLAGS.timeout: + task_kwargs['time_limit'] = float('inf') + + def loader(): + env = suite.load( + domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs) + env.task.visualize_reward = FLAGS.visualize_reward + if FLAGS.action_noise > 0: + env = action_noise.Wrapper(env, scale=FLAGS.action_noise) + return env + + viewer.launch(loader) + + +if __name__ == '__main__': + app.run(main) diff --git a/Dreamer/local_dm_control_suite/finger.py b/Dreamer/local_dm_control_suite/finger.py new file mode 100755 index 0000000..e700db6 --- /dev/null +++ b/Dreamer/local_dm_control_suite/finger.py @@ -0,0 +1,217 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Finger Domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.suite.utils import randomizers +from dm_control.utils import containers +import numpy as np +from six.moves import range + +_DEFAULT_TIME_LIMIT = 20 # (seconds) +_CONTROL_TIMESTEP = .02 # (seconds) +# For TURN tasks, the 'tip' geom needs to enter a spherical target of sizes: +_EASY_TARGET_SIZE = 0.07 +_HARD_TARGET_SIZE = 0.03 +# Initial spin velocity for the Stop task. +_INITIAL_SPIN_VELOCITY = 100 +# Spinning slower than this value (radian/second) is considered stopped. +_STOP_VELOCITY = 1e-6 +# Spinning faster than this value (radian/second) is considered spinning. +_SPIN_VELOCITY = 15.0 + + +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('finger.xml'), common.ASSETS + + +@SUITE.add('benchmarking') +def spin(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Spin task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Spin(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add('benchmarking') +def turn_easy(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the easy Turn task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Turn(target_radius=_EASY_TARGET_SIZE, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add('benchmarking') +def turn_hard(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the hard Turn task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Turn(target_radius=_HARD_TARGET_SIZE, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Finger domain.""" + + def touch(self): + """Returns logarithmically scaled signals from the two touch sensors.""" + return np.log1p(self.named.data.sensordata[['touchtop', 'touchbottom']]) + + def hinge_velocity(self): + """Returns the velocity of the hinge joint.""" + return self.named.data.sensordata['hinge_velocity'] + + def tip_position(self): + """Returns the (x,z) position of the tip relative to the hinge.""" + return (self.named.data.sensordata['tip'][[0, 2]] - + self.named.data.sensordata['spinner'][[0, 2]]) + + def bounded_position(self): + """Returns the positions, with the hinge angle replaced by tip position.""" + return np.hstack((self.named.data.sensordata[['proximal', 'distal']], + self.tip_position())) + + def velocity(self): + """Returns the velocities (extracted from sensordata).""" + return self.named.data.sensordata[['proximal_velocity', + 'distal_velocity', + 'hinge_velocity']] + + def target_position(self): + """Returns the (x,z) position of the target relative to the hinge.""" + return (self.named.data.sensordata['target'][[0, 2]] - + self.named.data.sensordata['spinner'][[0, 2]]) + + def to_target(self): + """Returns the vector from the tip to the target.""" + return self.target_position() - self.tip_position() + + def dist_to_target(self): + """Returns the signed distance to the target surface, negative is inside.""" + return (np.linalg.norm(self.to_target()) - + self.named.model.site_size['target', 0]) + + +class Spin(base.Task): + """A Finger `Task` to spin the stopped body.""" + + def __init__(self, random=None): + """Initializes a new `Spin` instance. + + Args: + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + super(Spin, self).__init__(random=random) + + def initialize_episode(self, physics): + physics.named.model.site_rgba['target', 3] = 0 + physics.named.model.site_rgba['tip', 3] = 0 + physics.named.model.dof_damping['hinge'] = .03 + _set_random_joint_angles(physics, self.random) + super(Spin, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns state and touch sensors, and target info.""" + obs = collections.OrderedDict() + obs['position'] = physics.bounded_position() + obs['velocity'] = physics.velocity() + obs['touch'] = physics.touch() + return obs + + def get_reward(self, physics): + """Returns a sparse reward.""" + return float(physics.hinge_velocity() <= -_SPIN_VELOCITY) + + +class Turn(base.Task): + """A Finger `Task` to turn the body to a target angle.""" + + def __init__(self, target_radius, random=None): + """Initializes a new `Turn` instance. + + Args: + target_radius: Radius of the target site, which specifies the goal angle. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._target_radius = target_radius + super(Turn, self).__init__(random=random) + + def initialize_episode(self, physics): + target_angle = self.random.uniform(-np.pi, np.pi) + hinge_x, hinge_z = physics.named.data.xanchor['hinge', ['x', 'z']] + radius = physics.named.model.geom_size['cap1'].sum() + target_x = hinge_x + radius * np.sin(target_angle) + target_z = hinge_z + radius * np.cos(target_angle) + physics.named.model.site_pos['target', ['x', 'z']] = target_x, target_z + physics.named.model.site_size['target', 0] = self._target_radius + + _set_random_joint_angles(physics, self.random) + + super(Turn, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns state, touch sensors, and target info.""" + obs = collections.OrderedDict() + obs['position'] = physics.bounded_position() + obs['velocity'] = physics.velocity() + obs['touch'] = physics.touch() + obs['target_position'] = physics.target_position() + obs['dist_to_target'] = physics.dist_to_target() + return obs + + def get_reward(self, physics): + return float(physics.dist_to_target() <= 0) + + +def _set_random_joint_angles(physics, random, max_attempts=1000): + """Sets the joints to a random collision-free state.""" + + for _ in range(max_attempts): + randomizers.randomize_limited_and_rotational_joints(physics, random) + # Check for collisions. + physics.after_reset() + if physics.data.ncon == 0: + break + else: + raise RuntimeError('Could not find a collision-free state ' + 'after {} attempts'.format(max_attempts)) diff --git a/Dreamer/local_dm_control_suite/finger.xml b/Dreamer/local_dm_control_suite/finger.xml new file mode 100755 index 0000000..3b35986 --- /dev/null +++ b/Dreamer/local_dm_control_suite/finger.xml @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Dreamer/local_dm_control_suite/fish.py b/Dreamer/local_dm_control_suite/fish.py new file mode 100755 index 0000000..3262def --- /dev/null +++ b/Dreamer/local_dm_control_suite/fish.py @@ -0,0 +1,176 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Fish Domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import rewards +import numpy as np + + +_DEFAULT_TIME_LIMIT = 40 +_CONTROL_TIMESTEP = .04 +_JOINTS = ['tail1', + 'tail_twist', + 'tail2', + 'finright_roll', + 'finright_pitch', + 'finleft_roll', + 'finleft_pitch'] +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('fish.xml'), common.ASSETS + + +@SUITE.add('benchmarking') +def upright(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the Fish Upright task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Upright(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit, + **environment_kwargs) + + +@SUITE.add('benchmarking') +def swim(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Fish Swim task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Swim(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Fish domain.""" + + def upright(self): + """Returns projection from z-axes of torso to the z-axes of worldbody.""" + return self.named.data.xmat['torso', 'zz'] + + def torso_velocity(self): + """Returns velocities and angular velocities of the torso.""" + return self.data.sensordata + + def joint_velocities(self): + """Returns the joint velocities.""" + return self.named.data.qvel[_JOINTS] + + def joint_angles(self): + """Returns the joint positions.""" + return self.named.data.qpos[_JOINTS] + + def mouth_to_target(self): + """Returns a vector, from mouth to target in local coordinate of mouth.""" + data = self.named.data + mouth_to_target_global = data.geom_xpos['target'] - data.geom_xpos['mouth'] + return mouth_to_target_global.dot(data.geom_xmat['mouth'].reshape(3, 3)) + + +class Upright(base.Task): + """A Fish `Task` for getting the torso upright with smooth reward.""" + + def __init__(self, random=None): + """Initializes an instance of `Upright`. + + Args: + random: Either an existing `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically. + """ + super(Upright, self).__init__(random=random) + + def initialize_episode(self, physics): + """Randomizes the tail and fin angles and the orientation of the Fish.""" + quat = self.random.randn(4) + physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat) + for joint in _JOINTS: + physics.named.data.qpos[joint] = self.random.uniform(-.2, .2) + # Hide the target. It's irrelevant for this task. + physics.named.model.geom_rgba['target', 3] = 0 + super(Upright, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of joint angles, velocities and uprightness.""" + obs = collections.OrderedDict() + obs['joint_angles'] = physics.joint_angles() + obs['upright'] = physics.upright() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a smooth reward.""" + return rewards.tolerance(physics.upright(), bounds=(1, 1), margin=1) + + +class Swim(base.Task): + """A Fish `Task` for swimming with smooth reward.""" + + def __init__(self, random=None): + """Initializes an instance of `Swim`. + + Args: + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + super(Swim, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode.""" + + quat = self.random.randn(4) + physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat) + for joint in _JOINTS: + physics.named.data.qpos[joint] = self.random.uniform(-.2, .2) + # Randomize target position. + physics.named.model.geom_pos['target', 'x'] = self.random.uniform(-.4, .4) + physics.named.model.geom_pos['target', 'y'] = self.random.uniform(-.4, .4) + physics.named.model.geom_pos['target', 'z'] = self.random.uniform(.1, .3) + super(Swim, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of joints, target direction and velocities.""" + obs = collections.OrderedDict() + obs['joint_angles'] = physics.joint_angles() + obs['upright'] = physics.upright() + obs['target'] = physics.mouth_to_target() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a smooth reward.""" + radii = physics.named.model.geom_size[['mouth', 'target'], 0].sum() + in_target = rewards.tolerance(np.linalg.norm(physics.mouth_to_target()), + bounds=(0, radii), margin=2*radii) + is_upright = 0.5 * (physics.upright() + 1) + return (7*in_target + is_upright) / 8 diff --git a/Dreamer/local_dm_control_suite/fish.xml b/Dreamer/local_dm_control_suite/fish.xml new file mode 100755 index 0000000..43de56d --- /dev/null +++ b/Dreamer/local_dm_control_suite/fish.xml @@ -0,0 +1,85 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Dreamer/local_dm_control_suite/hopper.py b/Dreamer/local_dm_control_suite/hopper.py new file mode 100755 index 0000000..6458e41 --- /dev/null +++ b/Dreamer/local_dm_control_suite/hopper.py @@ -0,0 +1,138 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Hopper domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.suite.utils import randomizers +from dm_control.utils import containers +from dm_control.utils import rewards +import numpy as np + + +SUITE = containers.TaggedTasks() + +_CONTROL_TIMESTEP = .02 # (Seconds) + +# Default duration of an episode, in seconds. +_DEFAULT_TIME_LIMIT = 20 + +# Minimal height of torso over foot above which stand reward is 1. +_STAND_HEIGHT = 0.6 + +# Hopping speed above which hop reward is 1. +_HOP_SPEED = 2 + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('hopper.xml'), common.ASSETS + + +@SUITE.add('benchmarking') +def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns a Hopper that strives to stand upright, balancing its pose.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Hopper(hopping=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add('benchmarking') +def hop(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns a Hopper that strives to hop forward.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Hopper(hopping=True, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Hopper domain.""" + + def height(self): + """Returns height of torso with respect to foot.""" + return (self.named.data.xipos['torso', 'z'] - + self.named.data.xipos['foot', 'z']) + + def speed(self): + """Returns horizontal speed of the Hopper.""" + return self.named.data.sensordata['torso_subtreelinvel'][0] + + def touch(self): + """Returns the signals from two foot touch sensors.""" + return np.log1p(self.named.data.sensordata[['touch_toe', 'touch_heel']]) + + +class Hopper(base.Task): + """A Hopper's `Task` to train a standing and a jumping Hopper.""" + + def __init__(self, hopping, random=None): + """Initialize an instance of `Hopper`. + + Args: + hopping: Boolean, if True the task is to hop forwards, otherwise it is to + balance upright. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._hopping = hopping + super(Hopper, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode.""" + randomizers.randomize_limited_and_rotational_joints(physics, self.random) + self._timeout_progress = 0 + super(Hopper, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of positions, velocities and touch sensors.""" + obs = collections.OrderedDict() + # Ignores horizontal position to maintain translational invariance: + obs['position'] = physics.data.qpos[1:].copy() + obs['velocity'] = physics.velocity() + obs['touch'] = physics.touch() + return obs + + def get_reward(self, physics): + """Returns a reward applicable to the performed task.""" + standing = rewards.tolerance(physics.height(), (_STAND_HEIGHT, 2)) + if self._hopping: + hopping = rewards.tolerance(physics.speed(), + bounds=(_HOP_SPEED, float('inf')), + margin=_HOP_SPEED/2, + value_at_margin=0.5, + sigmoid='linear') + return standing * hopping + else: + small_control = rewards.tolerance(physics.control(), + margin=1, value_at_margin=0, + sigmoid='quadratic').mean() + small_control = (small_control + 4) / 5 + return standing * small_control diff --git a/Dreamer/local_dm_control_suite/hopper.xml b/Dreamer/local_dm_control_suite/hopper.xml new file mode 100755 index 0000000..0c8ec28 --- /dev/null +++ b/Dreamer/local_dm_control_suite/hopper.xml @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/Dreamer/local_dm_control_suite/humanoid.py b/Dreamer/local_dm_control_suite/humanoid.py new file mode 100755 index 0000000..5a161f0 --- /dev/null +++ b/Dreamer/local_dm_control_suite/humanoid.py @@ -0,0 +1,211 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Humanoid Domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.suite.utils import randomizers +from dm_control.utils import containers +from dm_control.utils import rewards +import numpy as np + +_DEFAULT_TIME_LIMIT = 25 +_CONTROL_TIMESTEP = .025 + +# Height of head above which stand reward is 1. +_STAND_HEIGHT = 1.4 + +# Horizontal speeds above which move reward is 1. +_WALK_SPEED = 1 +_RUN_SPEED = 10 + + +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('humanoid.xml'), common.ASSETS + + +@SUITE.add('benchmarking') +def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Stand task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Humanoid(move_speed=0, pure_state=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add('benchmarking') +def walk(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Walk task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Humanoid(move_speed=_WALK_SPEED, pure_state=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add('benchmarking') +def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Run task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Humanoid(move_speed=_RUN_SPEED, pure_state=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add() +def run_pure_state(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the Run task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Humanoid(move_speed=_RUN_SPEED, pure_state=True, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Walker domain.""" + + def torso_upright(self): + """Returns projection from z-axes of torso to the z-axes of world.""" + return self.named.data.xmat['torso', 'zz'] + + def head_height(self): + """Returns the height of the torso.""" + return self.named.data.xpos['head', 'z'] + + def center_of_mass_position(self): + """Returns position of the center-of-mass.""" + return self.named.data.subtree_com['torso'].copy() + + def center_of_mass_velocity(self): + """Returns the velocity of the center-of-mass.""" + return self.named.data.sensordata['torso_subtreelinvel'].copy() + + def torso_vertical_orientation(self): + """Returns the z-projection of the torso orientation matrix.""" + return self.named.data.xmat['torso', ['zx', 'zy', 'zz']] + + def joint_angles(self): + """Returns the state without global orientation or position.""" + return self.data.qpos[7:].copy() # Skip the 7 DoFs of the free root joint. + + def extremities(self): + """Returns end effector positions in egocentric frame.""" + torso_frame = self.named.data.xmat['torso'].reshape(3, 3) + torso_pos = self.named.data.xpos['torso'] + positions = [] + for side in ('left_', 'right_'): + for limb in ('hand', 'foot'): + torso_to_limb = self.named.data.xpos[side + limb] - torso_pos + positions.append(torso_to_limb.dot(torso_frame)) + return np.hstack(positions) + + +class Humanoid(base.Task): + """A humanoid task.""" + + def __init__(self, move_speed, pure_state, random=None): + """Initializes an instance of `Humanoid`. + + Args: + move_speed: A float. If this value is zero, reward is given simply for + standing up. Otherwise this specifies a target horizontal velocity for + the walking task. + pure_state: A bool. Whether the observations consist of the pure MuJoCo + state or includes some useful features thereof. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._move_speed = move_speed + self._pure_state = pure_state + super(Humanoid, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Args: + physics: An instance of `Physics`. + + """ + # Find a collision-free random initial configuration. + penetrating = True + while penetrating: + randomizers.randomize_limited_and_rotational_joints(physics, self.random) + # Check for collisions. + physics.after_reset() + penetrating = physics.data.ncon > 0 + super(Humanoid, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns either the pure state or a set of egocentric features.""" + obs = collections.OrderedDict() + if self._pure_state: + obs['position'] = physics.position() + obs['velocity'] = physics.velocity() + else: + obs['joint_angles'] = physics.joint_angles() + obs['head_height'] = physics.head_height() + obs['extremities'] = physics.extremities() + obs['torso_vertical'] = physics.torso_vertical_orientation() + obs['com_velocity'] = physics.center_of_mass_velocity() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a reward to the agent.""" + standing = rewards.tolerance(physics.head_height(), + bounds=(_STAND_HEIGHT, float('inf')), + margin=_STAND_HEIGHT/4) + upright = rewards.tolerance(physics.torso_upright(), + bounds=(0.9, float('inf')), sigmoid='linear', + margin=1.9, value_at_margin=0) + stand_reward = standing * upright + small_control = rewards.tolerance(physics.control(), margin=1, + value_at_margin=0, + sigmoid='quadratic').mean() + small_control = (4 + small_control) / 5 + if self._move_speed == 0: + horizontal_velocity = physics.center_of_mass_velocity()[[0, 1]] + dont_move = rewards.tolerance(horizontal_velocity, margin=2).mean() + return small_control * stand_reward * dont_move + else: + com_velocity = np.linalg.norm(physics.center_of_mass_velocity()[[0, 1]]) + move = rewards.tolerance(com_velocity, + bounds=(self._move_speed, float('inf')), + margin=self._move_speed, value_at_margin=0, + sigmoid='linear') + move = (5*move + 1) / 6 + return small_control * stand_reward * move diff --git a/Dreamer/local_dm_control_suite/humanoid.xml b/Dreamer/local_dm_control_suite/humanoid.xml new file mode 100755 index 0000000..32b84c5 --- /dev/null +++ b/Dreamer/local_dm_control_suite/humanoid.xml @@ -0,0 +1,202 @@ + + + + + + + + + diff --git a/Dreamer/local_dm_control_suite/humanoid_CMU.py b/Dreamer/local_dm_control_suite/humanoid_CMU.py new file mode 100755 index 0000000..d06fb63 --- /dev/null +++ b/Dreamer/local_dm_control_suite/humanoid_CMU.py @@ -0,0 +1,179 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Humanoid_CMU Domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.suite.utils import randomizers +from dm_control.utils import containers +from dm_control.utils import rewards +import numpy as np + +_DEFAULT_TIME_LIMIT = 20 +_CONTROL_TIMESTEP = 0.02 + +# Height of head above which stand reward is 1. +_STAND_HEIGHT = 1.4 + +# Horizontal speeds above which move reward is 1. +_WALK_SPEED = 1 +_RUN_SPEED = 10 + +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('humanoid_CMU.xml'), common.ASSETS + + +@SUITE.add() +def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Stand task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = HumanoidCMU(move_speed=0, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add() +def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Run task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = HumanoidCMU(move_speed=_RUN_SPEED, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the humanoid_CMU domain.""" + + def thorax_upright(self): + """Returns projection from y-axes of thorax to the z-axes of world.""" + return self.named.data.xmat['thorax', 'zy'] + + def head_height(self): + """Returns the height of the head.""" + return self.named.data.xpos['head', 'z'] + + def center_of_mass_position(self): + """Returns position of the center-of-mass.""" + return self.named.data.subtree_com['thorax'] + + def center_of_mass_velocity(self): + """Returns the velocity of the center-of-mass.""" + return self.named.data.sensordata['thorax_subtreelinvel'].copy() + + def torso_vertical_orientation(self): + """Returns the z-projection of the thorax orientation matrix.""" + return self.named.data.xmat['thorax', ['zx', 'zy', 'zz']] + + def joint_angles(self): + """Returns the state without global orientation or position.""" + return self.data.qpos[7:].copy() # Skip the 7 DoFs of the free root joint. + + def extremities(self): + """Returns end effector positions in egocentric frame.""" + torso_frame = self.named.data.xmat['thorax'].reshape(3, 3) + torso_pos = self.named.data.xpos['thorax'] + positions = [] + for side in ('l', 'r'): + for limb in ('hand', 'foot'): + torso_to_limb = self.named.data.xpos[side + limb] - torso_pos + positions.append(torso_to_limb.dot(torso_frame)) + return np.hstack(positions) + + +class HumanoidCMU(base.Task): + """A task for the CMU Humanoid.""" + + def __init__(self, move_speed, random=None): + """Initializes an instance of `Humanoid_CMU`. + + Args: + move_speed: A float. If this value is zero, reward is given simply for + standing up. Otherwise this specifies a target horizontal velocity for + the walking task. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._move_speed = move_speed + super(HumanoidCMU, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets a random collision-free configuration at the start of each episode. + + Args: + physics: An instance of `Physics`. + """ + penetrating = True + while penetrating: + randomizers.randomize_limited_and_rotational_joints( + physics, self.random) + # Check for collisions. + physics.after_reset() + penetrating = physics.data.ncon > 0 + super(HumanoidCMU, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns a set of egocentric features.""" + obs = collections.OrderedDict() + obs['joint_angles'] = physics.joint_angles() + obs['head_height'] = physics.head_height() + obs['extremities'] = physics.extremities() + obs['torso_vertical'] = physics.torso_vertical_orientation() + obs['com_velocity'] = physics.center_of_mass_velocity() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a reward to the agent.""" + standing = rewards.tolerance(physics.head_height(), + bounds=(_STAND_HEIGHT, float('inf')), + margin=_STAND_HEIGHT/4) + upright = rewards.tolerance(physics.thorax_upright(), + bounds=(0.9, float('inf')), sigmoid='linear', + margin=1.9, value_at_margin=0) + stand_reward = standing * upright + small_control = rewards.tolerance(physics.control(), margin=1, + value_at_margin=0, + sigmoid='quadratic').mean() + small_control = (4 + small_control) / 5 + if self._move_speed == 0: + horizontal_velocity = physics.center_of_mass_velocity()[[0, 1]] + dont_move = rewards.tolerance(horizontal_velocity, margin=2).mean() + return small_control * stand_reward * dont_move + else: + com_velocity = np.linalg.norm(physics.center_of_mass_velocity()[[0, 1]]) + move = rewards.tolerance(com_velocity, + bounds=(self._move_speed, float('inf')), + margin=self._move_speed, value_at_margin=0, + sigmoid='linear') + move = (5*move + 1) / 6 + return small_control * stand_reward * move diff --git a/Dreamer/local_dm_control_suite/humanoid_CMU.xml b/Dreamer/local_dm_control_suite/humanoid_CMU.xml new file mode 100755 index 0000000..9a41a16 --- /dev/null +++ b/Dreamer/local_dm_control_suite/humanoid_CMU.xml @@ -0,0 +1,289 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Dreamer/local_dm_control_suite/lqr.py b/Dreamer/local_dm_control_suite/lqr.py new file mode 100755 index 0000000..34197b4 --- /dev/null +++ b/Dreamer/local_dm_control_suite/lqr.py @@ -0,0 +1,272 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Procedurally generated LQR domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import os + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import xml_tools +from lxml import etree +import numpy as np +from six.moves import range + +from dm_control.utils import io as resources + +_DEFAULT_TIME_LIMIT = float('inf') +_CONTROL_COST_COEF = 0.1 +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(n_bodies, n_actuators, random): + """Returns the model description as an XML string and a dict of assets. + + Args: + n_bodies: An int, number of bodies of the LQR. + n_actuators: An int, number of actuated bodies of the LQR. `n_actuators` + should be less or equal than `n_bodies`. + random: A `numpy.random.RandomState` instance. + + Returns: + A tuple `(model_xml_string, assets)`, where `assets` is a dict consisting of + `{filename: contents_string}` pairs. + """ + return _make_model(n_bodies, n_actuators, random), common.ASSETS + + +@SUITE.add() +def lqr_2_1(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns an LQR environment with 2 bodies of which the first is actuated.""" + return _make_lqr(n_bodies=2, + n_actuators=1, + control_cost_coef=_CONTROL_COST_COEF, + time_limit=time_limit, + random=random, + environment_kwargs=environment_kwargs) + + +@SUITE.add() +def lqr_6_2(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns an LQR environment with 6 bodies of which first 2 are actuated.""" + return _make_lqr(n_bodies=6, + n_actuators=2, + control_cost_coef=_CONTROL_COST_COEF, + time_limit=time_limit, + random=random, + environment_kwargs=environment_kwargs) + + +def _make_lqr(n_bodies, n_actuators, control_cost_coef, time_limit, random, + environment_kwargs): + """Returns a LQR environment. + + Args: + n_bodies: An int, number of bodies of the LQR. + n_actuators: An int, number of actuated bodies of the LQR. `n_actuators` + should be less or equal than `n_bodies`. + control_cost_coef: A number, the coefficient of the control cost. + time_limit: An int, maximum time for each episode in seconds. + random: Either an existing `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically. + environment_kwargs: A `dict` specifying keyword arguments for the + environment, or None. + + Returns: + A LQR environment with `n_bodies` bodies of which first `n_actuators` are + actuated. + """ + + if not isinstance(random, np.random.RandomState): + random = np.random.RandomState(random) + + model_string, assets = get_model_and_assets(n_bodies, n_actuators, + random=random) + physics = Physics.from_xml_string(model_string, assets=assets) + task = LQRLevel(control_cost_coef, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + **environment_kwargs) + + +def _make_body(body_id, stiffness_range, damping_range, random): + """Returns an `etree.Element` defining a body. + + Args: + body_id: Id of the created body. + stiffness_range: A tuple of (stiffness_lower_bound, stiffness_uppder_bound). + The stiffness of the joint is drawn uniformly from this range. + damping_range: A tuple of (damping_lower_bound, damping_upper_bound). The + damping of the joint is drawn uniformly from this range. + random: A `numpy.random.RandomState` instance. + + Returns: + A new instance of `etree.Element`. A body element with two children: joint + and geom. + """ + body_name = 'body_{}'.format(body_id) + joint_name = 'joint_{}'.format(body_id) + geom_name = 'geom_{}'.format(body_id) + + body = etree.Element('body', name=body_name) + body.set('pos', '.25 0 0') + joint = etree.SubElement(body, 'joint', name=joint_name) + body.append(etree.Element('geom', name=geom_name)) + joint.set('stiffness', + str(random.uniform(stiffness_range[0], stiffness_range[1]))) + joint.set('damping', + str(random.uniform(damping_range[0], damping_range[1]))) + return body + + +def _make_model(n_bodies, + n_actuators, + random, + stiffness_range=(15, 25), + damping_range=(0, 0)): + """Returns an MJCF XML string defining a model of springs and dampers. + + Args: + n_bodies: An integer, the number of bodies (DoFs) in the system. + n_actuators: An integer, the number of actuated bodies. + random: A `numpy.random.RandomState` instance. + stiffness_range: A tuple containing minimum and maximum stiffness. Each + joint's stiffness is sampled uniformly from this interval. + damping_range: A tuple containing minimum and maximum damping. Each joint's + damping is sampled uniformly from this interval. + + Returns: + An MJCF string describing the linear system. + + Raises: + ValueError: If the number of bodies or actuators is erronous. + """ + if n_bodies < 1 or n_actuators < 1: + raise ValueError('At least 1 body and 1 actuator required.') + if n_actuators > n_bodies: + raise ValueError('At most 1 actuator per body.') + + file_path = os.path.join(os.path.dirname(__file__), 'lqr.xml') + with resources.GetResourceAsFile(file_path) as xml_file: + mjcf = xml_tools.parse(xml_file) + parent = mjcf.find('./worldbody') + actuator = etree.SubElement(mjcf.getroot(), 'actuator') + tendon = etree.SubElement(mjcf.getroot(), 'tendon') + + for body in range(n_bodies): + # Inserting body. + child = _make_body(body, stiffness_range, damping_range, random) + site_name = 'site_{}'.format(body) + child.append(etree.Element('site', name=site_name)) + + if body == 0: + child.set('pos', '.25 0 .1') + # Add actuators to the first n_actuators bodies. + if body < n_actuators: + # Adding actuator. + joint_name = 'joint_{}'.format(body) + motor_name = 'motor_{}'.format(body) + child.find('joint').set('name', joint_name) + actuator.append(etree.Element('motor', name=motor_name, joint=joint_name)) + + # Add a tendon between consecutive bodies (for visualisation purposes only). + if body < n_bodies - 1: + child_site_name = 'site_{}'.format(body + 1) + tendon_name = 'tendon_{}'.format(body) + spatial = etree.SubElement(tendon, 'spatial', name=tendon_name) + spatial.append(etree.Element('site', site=site_name)) + spatial.append(etree.Element('site', site=child_site_name)) + parent.append(child) + parent = child + + return etree.tostring(mjcf, pretty_print=True) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the LQR domain.""" + + def state_norm(self): + """Returns the norm of the physics state.""" + return np.linalg.norm(self.state()) + + +class LQRLevel(base.Task): + """A Linear Quadratic Regulator `Task`.""" + + _TERMINAL_TOL = 1e-6 + + def __init__(self, control_cost_coef, random=None): + """Initializes an LQR level with cost = sum(states^2) + c*sum(controls^2). + + Args: + control_cost_coef: The coefficient of the control cost. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + + Raises: + ValueError: If the control cost coefficient is not positive. + """ + if control_cost_coef <= 0: + raise ValueError('control_cost_coef must be positive.') + + self._control_cost_coef = control_cost_coef + super(LQRLevel, self).__init__(random=random) + + @property + def control_cost_coef(self): + return self._control_cost_coef + + def initialize_episode(self, physics): + """Random state sampled from a unit sphere.""" + ndof = physics.model.nq + unit = self.random.randn(ndof) + physics.data.qpos[:] = np.sqrt(2) * unit / np.linalg.norm(unit) + super(LQRLevel, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of the state.""" + obs = collections.OrderedDict() + obs['position'] = physics.position() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a quadratic state and control reward.""" + position = physics.position() + state_cost = 0.5 * np.dot(position, position) + control_signal = physics.control() + control_l2_norm = 0.5 * np.dot(control_signal, control_signal) + return 1 - (state_cost + control_l2_norm * self._control_cost_coef) + + def get_evaluation(self, physics): + """Returns a sparse evaluation reward that is not used for learning.""" + return float(physics.state_norm() <= 0.01) + + def get_termination(self, physics): + """Terminates when the state norm is smaller than epsilon.""" + if physics.state_norm() < self._TERMINAL_TOL: + return 0.0 diff --git a/Dreamer/local_dm_control_suite/lqr.xml b/Dreamer/local_dm_control_suite/lqr.xml new file mode 100755 index 0000000..d403532 --- /dev/null +++ b/Dreamer/local_dm_control_suite/lqr.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + diff --git a/Dreamer/local_dm_control_suite/lqr_solver.py b/Dreamer/local_dm_control_suite/lqr_solver.py new file mode 100755 index 0000000..3935c7d --- /dev/null +++ b/Dreamer/local_dm_control_suite/lqr_solver.py @@ -0,0 +1,142 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +r"""Optimal policy for LQR levels. + +LQR control problem is described in +https://en.wikipedia.org/wiki/Linear-quadratic_regulator#Infinite-horizon.2C_discrete-time_LQR +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import logging +from dm_control.mujoco import wrapper +import numpy as np +from six.moves import range + +try: + import scipy.linalg as sp # pylint: disable=g-import-not-at-top +except ImportError: + sp = None + + +def _solve_dare(a, b, q, r): + """Solves the Discrete-time Algebraic Riccati Equation (DARE) by iteration. + + Algebraic Riccati Equation: + ```none + P_{t-1} = Q + A' * P_{t} * A - + A' * P_{t} * B * (R + B' * P_{t} * B)^{-1} * B' * P_{t} * A + ``` + + Args: + a: A 2 dimensional numpy array, transition matrix A. + b: A 2 dimensional numpy array, control matrix B. + q: A 2 dimensional numpy array, symmetric positive definite cost matrix. + r: A 2 dimensional numpy array, symmetric positive definite cost matrix + + Returns: + A numpy array, a real symmetric matrix P which is the solution to DARE. + + Raises: + RuntimeError: If the computed P matrix is not symmetric and + positive-definite. + """ + p = np.eye(len(a)) + for _ in range(1000000): + a_p = a.T.dot(p) # A' * P_t + a_p_b = np.dot(a_p, b) # A' * P_t * B + # Algebraic Riccati Equation. + p_next = q + np.dot(a_p, a) - a_p_b.dot( + np.linalg.solve(b.T.dot(p.dot(b)) + r, a_p_b.T)) + p_next += p_next.T + p_next *= .5 + if np.abs(p - p_next).max() < 1e-12: + break + p = p_next + else: + logging.warning('DARE solver did not converge') + try: + # Check that the result is symmetric and positive-definite. + np.linalg.cholesky(p_next) + except np.linalg.LinAlgError: + raise RuntimeError('ARE solver failed: P matrix is not symmetric and ' + 'positive-definite.') + return p_next + + +def solve(env): + """Returns the optimal value and policy for LQR problem. + + Args: + env: An instance of `control.EnvironmentV2` with LQR level. + + Returns: + p: A numpy array, the Hessian of the optimal total cost-to-go (value + function at state x) is V(x) = .5 * x' * p * x. + k: A numpy array which gives the optimal linear policy u = k * x. + beta: The maximum eigenvalue of (a + b * k). Under optimal policy, at + timestep n the state tends to 0 like beta^n. + + Raises: + RuntimeError: If the controlled system is unstable. + """ + n = env.physics.model.nq # number of DoFs + m = env.physics.model.nu # number of controls + + # Compute the mass matrix. + mass = np.zeros((n, n)) + wrapper.mjbindings.mjlib.mj_fullM(env.physics.model.ptr, mass, + env.physics.data.qM) + + # Compute input matrices a, b, q and r to the DARE solvers. + # State transition matrix a. + stiffness = np.diag(env.physics.model.jnt_stiffness.ravel()) + damping = np.diag(env.physics.model.dof_damping.ravel()) + dt = env.physics.model.opt.timestep + + j = np.linalg.solve(-mass, np.hstack((stiffness, damping))) + a = np.eye(2 * n) + dt * np.vstack( + (dt * j + np.hstack((np.zeros((n, n)), np.eye(n))), j)) + + # Control transition matrix b. + b = env.physics.data.actuator_moment.T + bc = np.linalg.solve(mass, b) + b = dt * np.vstack((dt * bc, bc)) + + # State cost Hessian q. + q = np.diag(np.hstack([np.ones(n), np.zeros(n)])) + + # Control cost Hessian r. + r = env.task.control_cost_coef * np.eye(m) + + if sp: + # Use scipy's faster DARE solver if available. + solve_dare = sp.solve_discrete_are + else: + # Otherwise fall back on a slower internal implementation. + solve_dare = _solve_dare + + # Solve the discrete algebraic Riccati equation. + p = solve_dare(a, b, q, r) + k = -np.linalg.solve(b.T.dot(p.dot(b)) + r, b.T.dot(p.dot(a))) + + # Under optimal policy, state tends to 0 like beta^n_timesteps + beta = np.abs(np.linalg.eigvals(a + b.dot(k))).max() + if beta >= 1.0: + raise RuntimeError('Controlled system is unstable.') + return p, k, beta diff --git a/Dreamer/local_dm_control_suite/manipulator.py b/Dreamer/local_dm_control_suite/manipulator.py new file mode 100755 index 0000000..b2ed31f --- /dev/null +++ b/Dreamer/local_dm_control_suite/manipulator.py @@ -0,0 +1,290 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Planar Manipulator domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import rewards +from dm_control.utils import xml_tools + +from lxml import etree +import numpy as np + +_CLOSE = .01 # (Meters) Distance below which a thing is considered close. +_CONTROL_TIMESTEP = .01 # (Seconds) +_TIME_LIMIT = 10 # (Seconds) +_P_IN_HAND = .1 # Probabillity of object-in-hand initial state +_P_IN_TARGET = .1 # Probabillity of object-in-target initial state +_ARM_JOINTS = ['arm_root', 'arm_shoulder', 'arm_elbow', 'arm_wrist', + 'finger', 'fingertip', 'thumb', 'thumbtip'] +_ALL_PROPS = frozenset(['ball', 'target_ball', 'cup', + 'peg', 'target_peg', 'slot']) + +SUITE = containers.TaggedTasks() + + +def make_model(use_peg, insert): + """Returns a tuple containing the model XML string and a dict of assets.""" + xml_string = common.read_model('manipulator.xml') + parser = etree.XMLParser(remove_blank_text=True) + mjcf = etree.XML(xml_string, parser) + + # Select the desired prop. + if use_peg: + required_props = ['peg', 'target_peg'] + if insert: + required_props += ['slot'] + else: + required_props = ['ball', 'target_ball'] + if insert: + required_props += ['cup'] + + # Remove unused props + for unused_prop in _ALL_PROPS.difference(required_props): + prop = xml_tools.find_element(mjcf, 'body', unused_prop) + prop.getparent().remove(prop) + + return etree.tostring(mjcf, pretty_print=True), common.ASSETS + + +@SUITE.add('benchmarking', 'hard') +def bring_ball(fully_observable=True, time_limit=_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns manipulator bring task with the ball prop.""" + use_peg = False + insert = False + physics = Physics.from_xml_string(*make_model(use_peg, insert)) + task = Bring(use_peg=use_peg, insert=insert, + fully_observable=fully_observable, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit, + **environment_kwargs) + + +@SUITE.add('hard') +def bring_peg(fully_observable=True, time_limit=_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns manipulator bring task with the peg prop.""" + use_peg = True + insert = False + physics = Physics.from_xml_string(*make_model(use_peg, insert)) + task = Bring(use_peg=use_peg, insert=insert, + fully_observable=fully_observable, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit, + **environment_kwargs) + + +@SUITE.add('hard') +def insert_ball(fully_observable=True, time_limit=_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns manipulator insert task with the ball prop.""" + use_peg = False + insert = True + physics = Physics.from_xml_string(*make_model(use_peg, insert)) + task = Bring(use_peg=use_peg, insert=insert, + fully_observable=fully_observable, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit, + **environment_kwargs) + + +@SUITE.add('hard') +def insert_peg(fully_observable=True, time_limit=_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns manipulator insert task with the peg prop.""" + use_peg = True + insert = True + physics = Physics.from_xml_string(*make_model(use_peg, insert)) + task = Bring(use_peg=use_peg, insert=insert, + fully_observable=fully_observable, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics with additional features for the Planar Manipulator domain.""" + + def bounded_joint_pos(self, joint_names): + """Returns joint positions as (sin, cos) values.""" + joint_pos = self.named.data.qpos[joint_names] + return np.vstack([np.sin(joint_pos), np.cos(joint_pos)]).T + + def joint_vel(self, joint_names): + """Returns joint velocities.""" + return self.named.data.qvel[joint_names] + + def body_2d_pose(self, body_names, orientation=True): + """Returns positions and/or orientations of bodies.""" + if not isinstance(body_names, str): + body_names = np.array(body_names).reshape(-1, 1) # Broadcast indices. + pos = self.named.data.xpos[body_names, ['x', 'z']] + if orientation: + ori = self.named.data.xquat[body_names, ['qw', 'qy']] + return np.hstack([pos, ori]) + else: + return pos + + def touch(self): + return np.log1p(self.data.sensordata) + + def site_distance(self, site1, site2): + site1_to_site2 = np.diff(self.named.data.site_xpos[[site2, site1]], axis=0) + return np.linalg.norm(site1_to_site2) + + +class Bring(base.Task): + """A Bring `Task`: bring the prop to the target.""" + + def __init__(self, use_peg, insert, fully_observable, random=None): + """Initialize an instance of the `Bring` task. + + Args: + use_peg: A `bool`, whether to replace the ball prop with the peg prop. + insert: A `bool`, whether to insert the prop in a receptacle. + fully_observable: A `bool`, whether the observation should contain the + position and velocity of the object being manipulated and the target + location. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._use_peg = use_peg + self._target = 'target_peg' if use_peg else 'target_ball' + self._object = 'peg' if self._use_peg else 'ball' + self._object_joints = ['_'.join([self._object, dim]) for dim in 'xzy'] + self._receptacle = 'slot' if self._use_peg else 'cup' + self._insert = insert + self._fully_observable = fully_observable + super(Bring, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode.""" + # Local aliases + choice = self.random.choice + uniform = self.random.uniform + model = physics.named.model + data = physics.named.data + + # Find a collision-free random initial configuration. + penetrating = True + while penetrating: + + # Randomise angles of arm joints. + is_limited = model.jnt_limited[_ARM_JOINTS].astype(np.bool) + joint_range = model.jnt_range[_ARM_JOINTS] + lower_limits = np.where(is_limited, joint_range[:, 0], -np.pi) + upper_limits = np.where(is_limited, joint_range[:, 1], np.pi) + angles = uniform(lower_limits, upper_limits) + data.qpos[_ARM_JOINTS] = angles + + # Symmetrize hand. + data.qpos['finger'] = data.qpos['thumb'] + + # Randomise target location. + target_x = uniform(-.4, .4) + target_z = uniform(.1, .4) + if self._insert: + target_angle = uniform(-np.pi/3, np.pi/3) + model.body_pos[self._receptacle, ['x', 'z']] = target_x, target_z + model.body_quat[self._receptacle, ['qw', 'qy']] = [ + np.cos(target_angle/2), np.sin(target_angle/2)] + else: + target_angle = uniform(-np.pi, np.pi) + + model.body_pos[self._target, ['x', 'z']] = target_x, target_z + model.body_quat[self._target, ['qw', 'qy']] = [ + np.cos(target_angle/2), np.sin(target_angle/2)] + + # Randomise object location. + object_init_probs = [_P_IN_HAND, _P_IN_TARGET, 1-_P_IN_HAND-_P_IN_TARGET] + init_type = choice(['in_hand', 'in_target', 'uniform'], + p=object_init_probs) + if init_type == 'in_target': + object_x = target_x + object_z = target_z + object_angle = target_angle + elif init_type == 'in_hand': + physics.after_reset() + object_x = data.site_xpos['grasp', 'x'] + object_z = data.site_xpos['grasp', 'z'] + grasp_direction = data.site_xmat['grasp', ['xx', 'zx']] + object_angle = np.pi-np.arctan2(grasp_direction[1], grasp_direction[0]) + else: + object_x = uniform(-.5, .5) + object_z = uniform(0, .7) + object_angle = uniform(0, 2*np.pi) + data.qvel[self._object + '_x'] = uniform(-5, 5) + + data.qpos[self._object_joints] = object_x, object_z, object_angle + + # Check for collisions. + physics.after_reset() + penetrating = physics.data.ncon > 0 + + super(Bring, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns either features or only sensors (to be used with pixels).""" + obs = collections.OrderedDict() + obs['arm_pos'] = physics.bounded_joint_pos(_ARM_JOINTS) + obs['arm_vel'] = physics.joint_vel(_ARM_JOINTS) + obs['touch'] = physics.touch() + if self._fully_observable: + obs['hand_pos'] = physics.body_2d_pose('hand') + obs['object_pos'] = physics.body_2d_pose(self._object) + obs['object_vel'] = physics.joint_vel(self._object_joints) + obs['target_pos'] = physics.body_2d_pose(self._target) + return obs + + def _is_close(self, distance): + return rewards.tolerance(distance, (0, _CLOSE), _CLOSE*2) + + def _peg_reward(self, physics): + """Returns a reward for bringing the peg prop to the target.""" + grasp = self._is_close(physics.site_distance('peg_grasp', 'grasp')) + pinch = self._is_close(physics.site_distance('peg_pinch', 'pinch')) + grasping = (grasp + pinch) / 2 + bring = self._is_close(physics.site_distance('peg', 'target_peg')) + bring_tip = self._is_close(physics.site_distance('target_peg_tip', + 'peg_tip')) + bringing = (bring + bring_tip) / 2 + return max(bringing, grasping/3) + + def _ball_reward(self, physics): + """Returns a reward for bringing the ball prop to the target.""" + return self._is_close(physics.site_distance('ball', 'target_ball')) + + def get_reward(self, physics): + """Returns a reward to the agent.""" + if self._use_peg: + return self._peg_reward(physics) + else: + return self._ball_reward(physics) diff --git a/Dreamer/local_dm_control_suite/manipulator.xml b/Dreamer/local_dm_control_suite/manipulator.xml new file mode 100755 index 0000000..d6d1767 --- /dev/null +++ b/Dreamer/local_dm_control_suite/manipulator.xml @@ -0,0 +1,211 @@ + + + + + + + + + + + + + + > + + diff --git a/Dreamer/local_dm_control_suite/pendulum.py b/Dreamer/local_dm_control_suite/pendulum.py new file mode 100755 index 0000000..38f442b --- /dev/null +++ b/Dreamer/local_dm_control_suite/pendulum.py @@ -0,0 +1,114 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Pendulum domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import rewards +import numpy as np + + +_DEFAULT_TIME_LIMIT = 20 +_ANGLE_BOUND = 8 +_COSINE_BOUND = np.cos(np.deg2rad(_ANGLE_BOUND)) +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('pendulum.xml'), common.ASSETS + + +@SUITE.add('benchmarking') +def swingup(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns pendulum swingup task .""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = SwingUp(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Pendulum domain.""" + + def pole_vertical(self): + """Returns vertical (z) component of pole frame.""" + return self.named.data.xmat['pole', 'zz'] + + def angular_velocity(self): + """Returns the angular velocity of the pole.""" + return self.named.data.qvel['hinge'].copy() + + def pole_orientation(self): + """Returns both horizontal and vertical components of pole frame.""" + return self.named.data.xmat['pole', ['zz', 'xz']] + + +class SwingUp(base.Task): + """A Pendulum `Task` to swing up and balance the pole.""" + + def __init__(self, random=None): + """Initialize an instance of `Pendulum`. + + Args: + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + super(SwingUp, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Pole is set to a random angle between [-pi, pi). + + Args: + physics: An instance of `Physics`. + + """ + physics.named.data.qpos['hinge'] = self.random.uniform(-np.pi, np.pi) + super(SwingUp, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation. + + Observations are states concatenating pole orientation and angular velocity + and pixels from fixed camera. + + Args: + physics: An instance of `physics`, Pendulum physics. + + Returns: + A `dict` of observation. + """ + obs = collections.OrderedDict() + obs['orientation'] = physics.pole_orientation() + obs['velocity'] = physics.angular_velocity() + return obs + + def get_reward(self, physics): + return rewards.tolerance(physics.pole_vertical(), (_COSINE_BOUND, 1)) diff --git a/Dreamer/local_dm_control_suite/pendulum.xml b/Dreamer/local_dm_control_suite/pendulum.xml new file mode 100755 index 0000000..14377ae --- /dev/null +++ b/Dreamer/local_dm_control_suite/pendulum.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Dreamer/local_dm_control_suite/point_mass.py b/Dreamer/local_dm_control_suite/point_mass.py new file mode 100755 index 0000000..b45ba17 --- /dev/null +++ b/Dreamer/local_dm_control_suite/point_mass.py @@ -0,0 +1,130 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Point-mass domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.suite.utils import randomizers +from dm_control.utils import containers +from dm_control.utils import rewards +import numpy as np + +_DEFAULT_TIME_LIMIT = 20 +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('point_mass.xml'), common.ASSETS + + +@SUITE.add('benchmarking', 'easy') +def easy(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the easy point_mass task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = PointMass(randomize_gains=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@SUITE.add() +def hard(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the hard point_mass task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = PointMass(randomize_gains=True, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +class Physics(mujoco.Physics): + """physics for the point_mass domain.""" + + def mass_to_target(self): + """Returns the vector from mass to target in global coordinate.""" + return (self.named.data.geom_xpos['target'] - + self.named.data.geom_xpos['pointmass']) + + def mass_to_target_dist(self): + """Returns the distance from mass to the target.""" + return np.linalg.norm(self.mass_to_target()) + + +class PointMass(base.Task): + """A point_mass `Task` to reach target with smooth reward.""" + + def __init__(self, randomize_gains, random=None): + """Initialize an instance of `PointMass`. + + Args: + randomize_gains: A `bool`, whether to randomize the actuator gains. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._randomize_gains = randomize_gains + super(PointMass, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + If _randomize_gains is True, the relationship between the controls and + the joints is randomized, so that each control actuates a random linear + combination of joints. + + Args: + physics: An instance of `mujoco.Physics`. + """ + randomizers.randomize_limited_and_rotational_joints(physics, self.random) + if self._randomize_gains: + dir1 = self.random.randn(2) + dir1 /= np.linalg.norm(dir1) + # Find another actuation direction that is not 'too parallel' to dir1. + parallel = True + while parallel: + dir2 = self.random.randn(2) + dir2 /= np.linalg.norm(dir2) + parallel = abs(np.dot(dir1, dir2)) > 0.9 + physics.model.wrap_prm[[0, 1]] = dir1 + physics.model.wrap_prm[[2, 3]] = dir2 + super(PointMass, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of the state.""" + obs = collections.OrderedDict() + obs['position'] = physics.position() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a reward to the agent.""" + target_size = physics.named.model.geom_size['target', 0] + near_target = rewards.tolerance(physics.mass_to_target_dist(), + bounds=(0, target_size), margin=target_size) + control_reward = rewards.tolerance(physics.control(), margin=1, + value_at_margin=0, + sigmoid='quadratic').mean() + small_control = (control_reward + 4) / 5 + return near_target * small_control diff --git a/Dreamer/local_dm_control_suite/point_mass.xml b/Dreamer/local_dm_control_suite/point_mass.xml new file mode 100755 index 0000000..c447cf6 --- /dev/null +++ b/Dreamer/local_dm_control_suite/point_mass.xml @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Dreamer/local_dm_control_suite/quadruped.py b/Dreamer/local_dm_control_suite/quadruped.py new file mode 100755 index 0000000..9e326d7 --- /dev/null +++ b/Dreamer/local_dm_control_suite/quadruped.py @@ -0,0 +1,480 @@ +# Copyright 2019 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Quadruped Domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.mujoco.wrapper import mjbindings +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import rewards +from dm_control.utils import xml_tools + +from lxml import etree +import numpy as np +from scipy import ndimage + +enums = mjbindings.enums +mjlib = mjbindings.mjlib + + +_DEFAULT_TIME_LIMIT = 20 +_CONTROL_TIMESTEP = .02 + +# Horizontal speeds above which the move reward is 1. +_RUN_SPEED = 5 +_WALK_SPEED = 0.5 + +# Constants related to terrain generation. +_HEIGHTFIELD_ID = 0 +_TERRAIN_SMOOTHNESS = 0.15 # 0.0: maximally bumpy; 1.0: completely smooth. +_TERRAIN_BUMP_SCALE = 2 # Spatial scale of terrain bumps (in meters). + +# Named model elements. +_TOES = ['toe_front_left', 'toe_back_left', 'toe_back_right', 'toe_front_right'] +_WALLS = ['wall_px', 'wall_py', 'wall_nx', 'wall_ny'] + +SUITE = containers.TaggedTasks() + + +def make_model(floor_size=None, terrain=False, rangefinders=False, + walls_and_ball=False): + """Returns the model XML string.""" + xml_string = common.read_model('quadruped.xml') + parser = etree.XMLParser(remove_blank_text=True) + mjcf = etree.XML(xml_string, parser) + + # Set floor size. + if floor_size is not None: + floor_geom = mjcf.find('.//geom[@name={!r}]'.format('floor')) + floor_geom.attrib['size'] = '{} {} .5'.format(floor_size, floor_size) + + # Remove walls, ball and target. + if not walls_and_ball: + for wall in _WALLS: + wall_geom = xml_tools.find_element(mjcf, 'geom', wall) + wall_geom.getparent().remove(wall_geom) + + # Remove ball. + ball_body = xml_tools.find_element(mjcf, 'body', 'ball') + ball_body.getparent().remove(ball_body) + + # Remove target. + target_site = xml_tools.find_element(mjcf, 'site', 'target') + target_site.getparent().remove(target_site) + + # Remove terrain. + if not terrain: + terrain_geom = xml_tools.find_element(mjcf, 'geom', 'terrain') + terrain_geom.getparent().remove(terrain_geom) + + # Remove rangefinders if they're not used, as range computations can be + # expensive, especially in a scene with heightfields. + if not rangefinders: + rangefinder_sensors = mjcf.findall('.//rangefinder') + for rf in rangefinder_sensors: + rf.getparent().remove(rf) + + return etree.tostring(mjcf, pretty_print=True) + + +@SUITE.add() +def walk(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Walk task.""" + xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED) + physics = Physics.from_xml_string(xml_string, common.ASSETS) + task = Move(desired_speed=_WALK_SPEED, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add() +def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Run task.""" + xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _RUN_SPEED) + physics = Physics.from_xml_string(xml_string, common.ASSETS) + task = Move(desired_speed=_RUN_SPEED, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add() +def escape(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the Escape task.""" + xml_string = make_model(floor_size=40, terrain=True, rangefinders=True) + physics = Physics.from_xml_string(xml_string, common.ASSETS) + task = Escape(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add() +def fetch(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Fetch task.""" + xml_string = make_model(walls_and_ball=True) + physics = Physics.from_xml_string(xml_string, common.ASSETS) + task = Fetch(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Quadruped domain.""" + + def _reload_from_data(self, data): + super(Physics, self)._reload_from_data(data) + # Clear cached sensor names when the physics is reloaded. + self._sensor_types_to_names = {} + self._hinge_names = [] + + def _get_sensor_names(self, *sensor_types): + try: + sensor_names = self._sensor_types_to_names[sensor_types] + except KeyError: + [sensor_ids] = np.where(np.in1d(self.model.sensor_type, sensor_types)) + sensor_names = [self.model.id2name(s_id, 'sensor') for s_id in sensor_ids] + self._sensor_types_to_names[sensor_types] = sensor_names + return sensor_names + + def torso_upright(self): + """Returns the dot-product of the torso z-axis and the global z-axis.""" + return np.asarray(self.named.data.xmat['torso', 'zz']) + + def torso_velocity(self): + """Returns the velocity of the torso, in the local frame.""" + return self.named.data.sensordata['velocimeter'].copy() + + def egocentric_state(self): + """Returns the state without global orientation or position.""" + if not self._hinge_names: + [hinge_ids] = np.nonzero(self.model.jnt_type == + enums.mjtJoint.mjJNT_HINGE) + self._hinge_names = [self.model.id2name(j_id, 'joint') + for j_id in hinge_ids] + return np.hstack((self.named.data.qpos[self._hinge_names], + self.named.data.qvel[self._hinge_names], + self.data.act)) + + def toe_positions(self): + """Returns toe positions in egocentric frame.""" + torso_frame = self.named.data.xmat['torso'].reshape(3, 3) + torso_pos = self.named.data.xpos['torso'] + torso_to_toe = self.named.data.xpos[_TOES] - torso_pos + return torso_to_toe.dot(torso_frame) + + def force_torque(self): + """Returns scaled force/torque sensor readings at the toes.""" + force_torque_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_FORCE, + enums.mjtSensor.mjSENS_TORQUE) + return np.arcsinh(self.named.data.sensordata[force_torque_sensors]) + + def imu(self): + """Returns IMU-like sensor readings.""" + imu_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_GYRO, + enums.mjtSensor.mjSENS_ACCELEROMETER) + return self.named.data.sensordata[imu_sensors] + + def rangefinder(self): + """Returns scaled rangefinder sensor readings.""" + rf_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_RANGEFINDER) + rf_readings = self.named.data.sensordata[rf_sensors] + no_intersection = -1.0 + return np.where(rf_readings == no_intersection, 1.0, np.tanh(rf_readings)) + + def origin_distance(self): + """Returns the distance from the origin to the workspace.""" + return np.asarray(np.linalg.norm(self.named.data.site_xpos['workspace'])) + + def origin(self): + """Returns origin position in the torso frame.""" + torso_frame = self.named.data.xmat['torso'].reshape(3, 3) + torso_pos = self.named.data.xpos['torso'] + return -torso_pos.dot(torso_frame) + + def ball_state(self): + """Returns ball position and velocity relative to the torso frame.""" + data = self.named.data + torso_frame = data.xmat['torso'].reshape(3, 3) + ball_rel_pos = data.xpos['ball'] - data.xpos['torso'] + ball_rel_vel = data.qvel['ball_root'][:3] - data.qvel['root'][:3] + ball_rot_vel = data.qvel['ball_root'][3:] + ball_state = np.vstack((ball_rel_pos, ball_rel_vel, ball_rot_vel)) + return ball_state.dot(torso_frame).ravel() + + def target_position(self): + """Returns target position in torso frame.""" + torso_frame = self.named.data.xmat['torso'].reshape(3, 3) + torso_pos = self.named.data.xpos['torso'] + torso_to_target = self.named.data.site_xpos['target'] - torso_pos + return torso_to_target.dot(torso_frame) + + def ball_to_target_distance(self): + """Returns horizontal distance from the ball to the target.""" + ball_to_target = (self.named.data.site_xpos['target'] - + self.named.data.xpos['ball']) + return np.linalg.norm(ball_to_target[:2]) + + def self_to_ball_distance(self): + """Returns horizontal distance from the quadruped workspace to the ball.""" + self_to_ball = (self.named.data.site_xpos['workspace'] + -self.named.data.xpos['ball']) + return np.linalg.norm(self_to_ball[:2]) + + +def _find_non_contacting_height(physics, orientation, x_pos=0.0, y_pos=0.0): + """Find a height with no contacts given a body orientation. + + Args: + physics: An instance of `Physics`. + orientation: A quaternion. + x_pos: A float. Position along global x-axis. + y_pos: A float. Position along global y-axis. + Raises: + RuntimeError: If a non-contacting configuration has not been found after + 10,000 attempts. + """ + z_pos = 0.0 # Start embedded in the floor. + num_contacts = 1 + num_attempts = 0 + # Move up in 1cm increments until no contacts. + while num_contacts > 0: + try: + with physics.reset_context(): + physics.named.data.qpos['root'][:3] = x_pos, y_pos, z_pos + physics.named.data.qpos['root'][3:] = orientation + except control.PhysicsError: + # We may encounter a PhysicsError here due to filling the contact + # buffer, in which case we simply increment the height and continue. + pass + num_contacts = physics.data.ncon + z_pos += 0.01 + num_attempts += 1 + if num_attempts > 10000: + raise RuntimeError('Failed to find a non-contacting configuration.') + + +def _common_observations(physics): + """Returns the observations common to all tasks.""" + obs = collections.OrderedDict() + obs['egocentric_state'] = physics.egocentric_state() + obs['torso_velocity'] = physics.torso_velocity() + obs['torso_upright'] = physics.torso_upright() + obs['imu'] = physics.imu() + obs['force_torque'] = physics.force_torque() + return obs + + +def _upright_reward(physics, deviation_angle=0): + """Returns a reward proportional to how upright the torso is. + + Args: + physics: an instance of `Physics`. + deviation_angle: A float, in degrees. The reward is 0 when the torso is + exactly upside-down and 1 when the torso's z-axis is less than + `deviation_angle` away from the global z-axis. + """ + deviation = np.cos(np.deg2rad(deviation_angle)) + return rewards.tolerance( + physics.torso_upright(), + bounds=(deviation, float('inf')), + sigmoid='linear', + margin=1 + deviation, + value_at_margin=0) + + +class Move(base.Task): + """A quadruped task solved by moving forward at a designated speed.""" + + def __init__(self, desired_speed, random=None): + """Initializes an instance of `Move`. + + Args: + desired_speed: A float. If this value is zero, reward is given simply + for standing upright. Otherwise this specifies the horizontal velocity + at which the velocity-dependent reward component is maximized. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._desired_speed = desired_speed + super(Move, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Args: + physics: An instance of `Physics`. + + """ + # Initial configuration. + orientation = self.random.randn(4) + orientation /= np.linalg.norm(orientation) + _find_non_contacting_height(physics, orientation) + super(Move, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation to the agent.""" + return _common_observations(physics) + + def get_reward(self, physics): + """Returns a reward to the agent.""" + + # Move reward term. + move_reward = rewards.tolerance( + physics.torso_velocity()[0], + bounds=(self._desired_speed, float('inf')), + margin=self._desired_speed, + value_at_margin=0.5, + sigmoid='linear') + + return _upright_reward(physics) * move_reward + + +class Escape(base.Task): + """A quadruped task solved by escaping a bowl-shaped terrain.""" + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Args: + physics: An instance of `Physics`. + + """ + # Get heightfield resolution, assert that it is square. + res = physics.model.hfield_nrow[_HEIGHTFIELD_ID] + assert res == physics.model.hfield_ncol[_HEIGHTFIELD_ID] + # Sinusoidal bowl shape. + row_grid, col_grid = np.ogrid[-1:1:res*1j, -1:1:res*1j] + radius = np.clip(np.sqrt(col_grid**2 + row_grid**2), .04, 1) + bowl_shape = .5 - np.cos(2*np.pi*radius)/2 + # Random smooth bumps. + terrain_size = 2 * physics.model.hfield_size[_HEIGHTFIELD_ID, 0] + bump_res = int(terrain_size / _TERRAIN_BUMP_SCALE) + bumps = self.random.uniform(_TERRAIN_SMOOTHNESS, 1, (bump_res, bump_res)) + smooth_bumps = ndimage.zoom(bumps, res / float(bump_res)) + # Terrain is elementwise product. + terrain = bowl_shape * smooth_bumps + start_idx = physics.model.hfield_adr[_HEIGHTFIELD_ID] + physics.model.hfield_data[start_idx:start_idx+res**2] = terrain.ravel() + super(Escape, self).initialize_episode(physics) + + # If we have a rendering context, we need to re-upload the modified + # heightfield data. + if physics.contexts: + with physics.contexts.gl.make_current() as ctx: + ctx.call(mjlib.mjr_uploadHField, + physics.model.ptr, + physics.contexts.mujoco.ptr, + _HEIGHTFIELD_ID) + + # Initial configuration. + orientation = self.random.randn(4) + orientation /= np.linalg.norm(orientation) + _find_non_contacting_height(physics, orientation) + + def get_observation(self, physics): + """Returns an observation to the agent.""" + obs = _common_observations(physics) + obs['origin'] = physics.origin() + obs['rangefinder'] = physics.rangefinder() + return obs + + def get_reward(self, physics): + """Returns a reward to the agent.""" + + # Escape reward term. + terrain_size = physics.model.hfield_size[_HEIGHTFIELD_ID, 0] + escape_reward = rewards.tolerance( + physics.origin_distance(), + bounds=(terrain_size, float('inf')), + margin=terrain_size, + value_at_margin=0, + sigmoid='linear') + + return _upright_reward(physics, deviation_angle=20) * escape_reward + + +class Fetch(base.Task): + """A quadruped task solved by bringing a ball to the origin.""" + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Args: + physics: An instance of `Physics`. + + """ + # Initial configuration, random azimuth and horizontal position. + azimuth = self.random.uniform(0, 2*np.pi) + orientation = np.array((np.cos(azimuth/2), 0, 0, np.sin(azimuth/2))) + spawn_radius = 0.9 * physics.named.model.geom_size['floor', 0] + x_pos, y_pos = self.random.uniform(-spawn_radius, spawn_radius, size=(2,)) + _find_non_contacting_height(physics, orientation, x_pos, y_pos) + + # Initial ball state. + physics.named.data.qpos['ball_root'][:2] = self.random.uniform( + -spawn_radius, spawn_radius, size=(2,)) + physics.named.data.qpos['ball_root'][2] = 2 + physics.named.data.qvel['ball_root'][:2] = 5*self.random.randn(2) + super(Fetch, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation to the agent.""" + obs = _common_observations(physics) + obs['ball_state'] = physics.ball_state() + obs['target_position'] = physics.target_position() + return obs + + def get_reward(self, physics): + """Returns a reward to the agent.""" + + # Reward for moving close to the ball. + arena_radius = physics.named.model.geom_size['floor', 0] * np.sqrt(2) + workspace_radius = physics.named.model.site_size['workspace', 0] + ball_radius = physics.named.model.geom_size['ball', 0] + reach_reward = rewards.tolerance( + physics.self_to_ball_distance(), + bounds=(0, workspace_radius+ball_radius), + sigmoid='linear', + margin=arena_radius, value_at_margin=0) + + # Reward for bringing the ball to the target. + target_radius = physics.named.model.site_size['target', 0] + fetch_reward = rewards.tolerance( + physics.ball_to_target_distance(), + bounds=(0, target_radius), + sigmoid='linear', + margin=arena_radius, value_at_margin=0) + + reach_then_fetch = reach_reward * (0.5 + 0.5*fetch_reward) + + return _upright_reward(physics) * reach_then_fetch diff --git a/Dreamer/local_dm_control_suite/quadruped.xml b/Dreamer/local_dm_control_suite/quadruped.xml new file mode 100755 index 0000000..958d2c0 --- /dev/null +++ b/Dreamer/local_dm_control_suite/quadruped.xml @@ -0,0 +1,329 @@ + + + + + + + + + + + + + + + + + diff --git a/Dreamer/local_dm_control_suite/reacher.py b/Dreamer/local_dm_control_suite/reacher.py new file mode 100755 index 0000000..feea8b4 --- /dev/null +++ b/Dreamer/local_dm_control_suite/reacher.py @@ -0,0 +1,116 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Reacher domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.suite.utils import randomizers +from dm_control.utils import containers +from dm_control.utils import rewards +import numpy as np + +SUITE = containers.TaggedTasks() +_DEFAULT_TIME_LIMIT = 20 +_BIG_TARGET = .05 +_SMALL_TARGET = .015 + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('reacher.xml'), common.ASSETS + + +@SUITE.add('benchmarking', 'easy') +def easy(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns reacher with sparse reward with 5e-2 tol and randomized target.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Reacher(target_size=_BIG_TARGET, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@SUITE.add('benchmarking') +def hard(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns reacher with sparse reward with 1e-2 tol and randomized target.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Reacher(target_size=_SMALL_TARGET, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Reacher domain.""" + + def finger_to_target(self): + """Returns the vector from target to finger in global coordinates.""" + return (self.named.data.geom_xpos['target', :2] - + self.named.data.geom_xpos['finger', :2]) + + def finger_to_target_dist(self): + """Returns the signed distance between the finger and target surface.""" + return np.linalg.norm(self.finger_to_target()) + + +class Reacher(base.Task): + """A reacher `Task` to reach the target.""" + + def __init__(self, target_size, random=None): + """Initialize an instance of `Reacher`. + + Args: + target_size: A `float`, tolerance to determine whether finger reached the + target. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._target_size = target_size + super(Reacher, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode.""" + physics.named.model.geom_size['target', 0] = self._target_size + randomizers.randomize_limited_and_rotational_joints(physics, self.random) + + # Randomize target position + angle = self.random.uniform(0, 2 * np.pi) + radius = self.random.uniform(.05, .20) + physics.named.model.geom_pos['target', 'x'] = radius * np.sin(angle) + physics.named.model.geom_pos['target', 'y'] = radius * np.cos(angle) + + super(Reacher, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of the state and the target position.""" + obs = collections.OrderedDict() + obs['position'] = physics.position() + obs['to_target'] = physics.finger_to_target() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + radii = physics.named.model.geom_size[['target', 'finger'], 0].sum() + return rewards.tolerance(physics.finger_to_target_dist(), (0, radii)) diff --git a/Dreamer/local_dm_control_suite/reacher.xml b/Dreamer/local_dm_control_suite/reacher.xml new file mode 100755 index 0000000..343f799 --- /dev/null +++ b/Dreamer/local_dm_control_suite/reacher.xml @@ -0,0 +1,47 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Dreamer/local_dm_control_suite/stacker.py b/Dreamer/local_dm_control_suite/stacker.py new file mode 100755 index 0000000..6d4d49c --- /dev/null +++ b/Dreamer/local_dm_control_suite/stacker.py @@ -0,0 +1,208 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Planar Stacker domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import rewards +from dm_control.utils import xml_tools + +from lxml import etree +import numpy as np + + +_CLOSE = .01 # (Meters) Distance below which a thing is considered close. +_CONTROL_TIMESTEP = .01 # (Seconds) +_TIME_LIMIT = 10 # (Seconds) +_ARM_JOINTS = ['arm_root', 'arm_shoulder', 'arm_elbow', 'arm_wrist', + 'finger', 'fingertip', 'thumb', 'thumbtip'] + +SUITE = containers.TaggedTasks() + + +def make_model(n_boxes): + """Returns a tuple containing the model XML string and a dict of assets.""" + xml_string = common.read_model('stacker.xml') + parser = etree.XMLParser(remove_blank_text=True) + mjcf = etree.XML(xml_string, parser) + + # Remove unused boxes + for b in range(n_boxes, 4): + box = xml_tools.find_element(mjcf, 'body', 'box' + str(b)) + box.getparent().remove(box) + + return etree.tostring(mjcf, pretty_print=True), common.ASSETS + + +@SUITE.add('hard') +def stack_2(fully_observable=True, time_limit=_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns stacker task with 2 boxes.""" + n_boxes = 2 + physics = Physics.from_xml_string(*make_model(n_boxes=n_boxes)) + task = Stack(n_boxes=n_boxes, + fully_observable=fully_observable, + random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit, + **environment_kwargs) + + +@SUITE.add('hard') +def stack_4(fully_observable=True, time_limit=_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns stacker task with 4 boxes.""" + n_boxes = 4 + physics = Physics.from_xml_string(*make_model(n_boxes=n_boxes)) + task = Stack(n_boxes=n_boxes, + fully_observable=fully_observable, + random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics with additional features for the Planar Manipulator domain.""" + + def bounded_joint_pos(self, joint_names): + """Returns joint positions as (sin, cos) values.""" + joint_pos = self.named.data.qpos[joint_names] + return np.vstack([np.sin(joint_pos), np.cos(joint_pos)]).T + + def joint_vel(self, joint_names): + """Returns joint velocities.""" + return self.named.data.qvel[joint_names] + + def body_2d_pose(self, body_names, orientation=True): + """Returns positions and/or orientations of bodies.""" + if not isinstance(body_names, str): + body_names = np.array(body_names).reshape(-1, 1) # Broadcast indices. + pos = self.named.data.xpos[body_names, ['x', 'z']] + if orientation: + ori = self.named.data.xquat[body_names, ['qw', 'qy']] + return np.hstack([pos, ori]) + else: + return pos + + def touch(self): + return np.log1p(self.data.sensordata) + + def site_distance(self, site1, site2): + site1_to_site2 = np.diff(self.named.data.site_xpos[[site2, site1]], axis=0) + return np.linalg.norm(site1_to_site2) + + +class Stack(base.Task): + """A Stack `Task`: stack the boxes.""" + + def __init__(self, n_boxes, fully_observable, random=None): + """Initialize an instance of the `Stack` task. + + Args: + n_boxes: An `int`, number of boxes to stack. + fully_observable: A `bool`, whether the observation should contain the + positions and velocities of the boxes and the location of the target. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._n_boxes = n_boxes + self._box_names = ['box' + str(b) for b in range(n_boxes)] + self._box_joint_names = [] + for name in self._box_names: + for dim in 'xyz': + self._box_joint_names.append('_'.join([name, dim])) + self._fully_observable = fully_observable + super(Stack, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode.""" + # Local aliases + randint = self.random.randint + uniform = self.random.uniform + model = physics.named.model + data = physics.named.data + + # Find a collision-free random initial configuration. + penetrating = True + while penetrating: + + # Randomise angles of arm joints. + is_limited = model.jnt_limited[_ARM_JOINTS].astype(np.bool) + joint_range = model.jnt_range[_ARM_JOINTS] + lower_limits = np.where(is_limited, joint_range[:, 0], -np.pi) + upper_limits = np.where(is_limited, joint_range[:, 1], np.pi) + angles = uniform(lower_limits, upper_limits) + data.qpos[_ARM_JOINTS] = angles + + # Symmetrize hand. + data.qpos['finger'] = data.qpos['thumb'] + + # Randomise target location. + target_height = 2*randint(self._n_boxes) + 1 + box_size = model.geom_size['target', 0] + model.body_pos['target', 'z'] = box_size * target_height + model.body_pos['target', 'x'] = uniform(-.37, .37) + + # Randomise box locations. + for name in self._box_names: + data.qpos[name + '_x'] = uniform(.1, .3) + data.qpos[name + '_z'] = uniform(0, .7) + data.qpos[name + '_y'] = uniform(0, 2*np.pi) + + # Check for collisions. + physics.after_reset() + penetrating = physics.data.ncon > 0 + + super(Stack, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns either features or only sensors (to be used with pixels).""" + obs = collections.OrderedDict() + obs['arm_pos'] = physics.bounded_joint_pos(_ARM_JOINTS) + obs['arm_vel'] = physics.joint_vel(_ARM_JOINTS) + obs['touch'] = physics.touch() + if self._fully_observable: + obs['hand_pos'] = physics.body_2d_pose('hand') + obs['box_pos'] = physics.body_2d_pose(self._box_names) + obs['box_vel'] = physics.joint_vel(self._box_joint_names) + obs['target_pos'] = physics.body_2d_pose('target', orientation=False) + return obs + + def get_reward(self, physics): + """Returns a reward to the agent.""" + box_size = physics.named.model.geom_size['target', 0] + min_box_to_target_distance = min(physics.site_distance(name, 'target') + for name in self._box_names) + box_is_close = rewards.tolerance(min_box_to_target_distance, + margin=2*box_size) + hand_to_target_distance = physics.site_distance('grasp', 'target') + hand_is_far = rewards.tolerance(hand_to_target_distance, + bounds=(.1, float('inf')), + margin=_CLOSE) + return box_is_close * hand_is_far diff --git a/Dreamer/local_dm_control_suite/stacker.xml b/Dreamer/local_dm_control_suite/stacker.xml new file mode 100755 index 0000000..7af4877 --- /dev/null +++ b/Dreamer/local_dm_control_suite/stacker.xml @@ -0,0 +1,193 @@ + + + + + + + + + + + + + + > + + diff --git a/Dreamer/local_dm_control_suite/swimmer.py b/Dreamer/local_dm_control_suite/swimmer.py new file mode 100755 index 0000000..96fd8ea --- /dev/null +++ b/Dreamer/local_dm_control_suite/swimmer.py @@ -0,0 +1,215 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Procedurally generated Swimmer domain.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.suite.utils import randomizers +from dm_control.utils import containers +from dm_control.utils import rewards +from lxml import etree +import numpy as np +from six.moves import range + +_DEFAULT_TIME_LIMIT = 30 +_CONTROL_TIMESTEP = .03 # (Seconds) + +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(n_joints): + """Returns a tuple containing the model XML string and a dict of assets. + + Args: + n_joints: An integer specifying the number of joints in the swimmer. + + Returns: + A tuple `(model_xml_string, assets)`, where `assets` is a dict consisting of + `{filename: contents_string}` pairs. + """ + return _make_model(n_joints), common.ASSETS + + +@SUITE.add('benchmarking') +def swimmer6(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns a 6-link swimmer.""" + return _make_swimmer(6, time_limit, random=random, + environment_kwargs=environment_kwargs) + + +@SUITE.add('benchmarking') +def swimmer15(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns a 15-link swimmer.""" + return _make_swimmer(15, time_limit, random=random, + environment_kwargs=environment_kwargs) + + +def swimmer(n_links=3, time_limit=_DEFAULT_TIME_LIMIT, + random=None, environment_kwargs=None): + """Returns a swimmer with n links.""" + return _make_swimmer(n_links, time_limit, random=random, + environment_kwargs=environment_kwargs) + + +def _make_swimmer(n_joints, time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns a swimmer control environment.""" + model_string, assets = get_model_and_assets(n_joints) + physics = Physics.from_xml_string(model_string, assets=assets) + task = Swimmer(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +def _make_model(n_bodies): + """Generates an xml string defining a swimmer with `n_bodies` bodies.""" + if n_bodies < 3: + raise ValueError('At least 3 bodies required. Received {}'.format(n_bodies)) + mjcf = etree.fromstring(common.read_model('swimmer.xml')) + head_body = mjcf.find('./worldbody/body') + actuator = etree.SubElement(mjcf, 'actuator') + sensor = etree.SubElement(mjcf, 'sensor') + + parent = head_body + for body_index in range(n_bodies - 1): + site_name = 'site_{}'.format(body_index) + child = _make_body(body_index=body_index) + child.append(etree.Element('site', name=site_name)) + joint_name = 'joint_{}'.format(body_index) + joint_limit = 360.0/n_bodies + joint_range = '{} {}'.format(-joint_limit, joint_limit) + child.append(etree.Element('joint', {'name': joint_name, + 'range': joint_range})) + motor_name = 'motor_{}'.format(body_index) + actuator.append(etree.Element('motor', name=motor_name, joint=joint_name)) + velocimeter_name = 'velocimeter_{}'.format(body_index) + sensor.append(etree.Element('velocimeter', name=velocimeter_name, + site=site_name)) + gyro_name = 'gyro_{}'.format(body_index) + sensor.append(etree.Element('gyro', name=gyro_name, site=site_name)) + parent.append(child) + parent = child + + # Move tracking cameras further away from the swimmer according to its length. + cameras = mjcf.findall('./worldbody/body/camera') + scale = n_bodies / 6.0 + for cam in cameras: + if cam.get('mode') == 'trackcom': + old_pos = cam.get('pos').split(' ') + new_pos = ' '.join([str(float(dim) * scale) for dim in old_pos]) + cam.set('pos', new_pos) + + return etree.tostring(mjcf, pretty_print=True) + + +def _make_body(body_index): + """Generates an xml string defining a single physical body.""" + body_name = 'segment_{}'.format(body_index) + visual_name = 'visual_{}'.format(body_index) + inertial_name = 'inertial_{}'.format(body_index) + body = etree.Element('body', name=body_name) + body.set('pos', '0 .1 0') + etree.SubElement(body, 'geom', {'class': 'visual', 'name': visual_name}) + etree.SubElement(body, 'geom', {'class': 'inertial', 'name': inertial_name}) + return body + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the swimmer domain.""" + + def nose_to_target(self): + """Returns a vector from nose to target in local coordinate of the head.""" + nose_to_target = (self.named.data.geom_xpos['target'] - + self.named.data.geom_xpos['nose']) + head_orientation = self.named.data.xmat['head'].reshape(3, 3) + return nose_to_target.dot(head_orientation)[:2] + + def nose_to_target_dist(self): + """Returns the distance from the nose to the target.""" + return np.linalg.norm(self.nose_to_target()) + + def body_velocities(self): + """Returns local body velocities: x,y linear, z rotational.""" + xvel_local = self.data.sensordata[12:].reshape((-1, 6)) + vx_vy_wz = [0, 1, 5] # Indices for linear x,y vels and rotational z vel. + return xvel_local[:, vx_vy_wz].ravel() + + def joints(self): + """Returns all internal joint angles (excluding root joints).""" + return self.data.qpos[3:].copy() + + +class Swimmer(base.Task): + """A swimmer `Task` to reach the target or just swim.""" + + def __init__(self, random=None): + """Initializes an instance of `Swimmer`. + + Args: + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + super(Swimmer, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Initializes the swimmer orientation to [-pi, pi) and the relative joint + angle of each joint uniformly within its range. + + Args: + physics: An instance of `Physics`. + """ + # Random joint angles: + randomizers.randomize_limited_and_rotational_joints(physics, self.random) + # Random target position. + close_target = self.random.rand() < .2 # Probability of a close target. + target_box = .3 if close_target else 2 + xpos, ypos = self.random.uniform(-target_box, target_box, size=2) + physics.named.model.geom_pos['target', 'x'] = xpos + physics.named.model.geom_pos['target', 'y'] = ypos + physics.named.model.light_pos['target_light', 'x'] = xpos + physics.named.model.light_pos['target_light', 'y'] = ypos + + super(Swimmer, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of joint angles, body velocities and target.""" + obs = collections.OrderedDict() + obs['joints'] = physics.joints() + obs['to_target'] = physics.nose_to_target() + obs['body_velocities'] = physics.body_velocities() + return obs + + def get_reward(self, physics): + """Returns a smooth reward.""" + target_size = physics.named.model.geom_size['target', 0] + return rewards.tolerance(physics.nose_to_target_dist(), + bounds=(0, target_size), + margin=5*target_size, + sigmoid='long_tail') diff --git a/Dreamer/local_dm_control_suite/swimmer.xml b/Dreamer/local_dm_control_suite/swimmer.xml new file mode 100755 index 0000000..29c7bc8 --- /dev/null +++ b/Dreamer/local_dm_control_suite/swimmer.xml @@ -0,0 +1,57 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Dreamer/local_dm_control_suite/tests/domains_test.py b/Dreamer/local_dm_control_suite/tests/domains_test.py new file mode 100755 index 0000000..4c148cf --- /dev/null +++ b/Dreamer/local_dm_control_suite/tests/domains_test.py @@ -0,0 +1,292 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Tests for dm_control.suite domains.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Internal dependencies. +from absl.testing import absltest +from absl.testing import parameterized +from dm_control import suite +from dm_control.rl import control +import mock +import numpy as np +import six +from six.moves import range +from six.moves import zip + + +def uniform_random_policy(action_spec, random=None): + lower_bounds = action_spec.minimum + upper_bounds = action_spec.maximum + # Draw values between -1 and 1 for unbounded actions. + lower_bounds = np.where(np.isinf(lower_bounds), -1.0, lower_bounds) + upper_bounds = np.where(np.isinf(upper_bounds), 1.0, upper_bounds) + random_state = np.random.RandomState(random) + def policy(time_step): + del time_step # Unused. + return random_state.uniform(lower_bounds, upper_bounds) + return policy + + +def step_environment(env, policy, num_episodes=5, max_steps_per_episode=10): + for _ in range(num_episodes): + step_count = 0 + time_step = env.reset() + yield time_step + while not time_step.last(): + action = policy(time_step) + time_step = env.step(action) + step_count += 1 + yield time_step + if step_count >= max_steps_per_episode: + break + + +def make_trajectory(domain, task, seed, **trajectory_kwargs): + env = suite.load(domain, task, task_kwargs={'random': seed}) + policy = uniform_random_policy(env.action_spec(), random=seed) + return step_environment(env, policy, **trajectory_kwargs) + + +class DomainTest(parameterized.TestCase): + """Tests run on all the tasks registered.""" + + def test_constants(self): + num_tasks = sum(len(tasks) for tasks in + six.itervalues(suite.TASKS_BY_DOMAIN)) + + self.assertLen(suite.ALL_TASKS, num_tasks) + + def _validate_observation(self, observation_dict, observation_spec): + obs = observation_dict.copy() + for name, spec in six.iteritems(observation_spec): + arr = obs.pop(name) + self.assertEqual(arr.shape, spec.shape) + self.assertEqual(arr.dtype, spec.dtype) + self.assertTrue( + np.all(np.isfinite(arr)), + msg='{!r} has non-finite value(s): {!r}'.format(name, arr)) + self.assertEmpty( + obs, + msg='Observation contains arrays(s) that are not in the spec: {!r}' + .format(obs)) + + def _validate_reward_range(self, time_step): + if time_step.first(): + self.assertIsNone(time_step.reward) + else: + self.assertIsInstance(time_step.reward, float) + self.assertBetween(time_step.reward, 0, 1) + + def _validate_discount(self, time_step): + if time_step.first(): + self.assertIsNone(time_step.discount) + else: + self.assertIsInstance(time_step.discount, float) + self.assertBetween(time_step.discount, 0, 1) + + def _validate_control_range(self, lower_bounds, upper_bounds): + for b in lower_bounds: + self.assertEqual(b, -1.0) + for b in upper_bounds: + self.assertEqual(b, 1.0) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_components_have_names(self, domain, task): + env = suite.load(domain, task) + model = env.physics.model + + object_types_and_size_fields = [ + ('body', 'nbody'), + ('joint', 'njnt'), + ('geom', 'ngeom'), + ('site', 'nsite'), + ('camera', 'ncam'), + ('light', 'nlight'), + ('mesh', 'nmesh'), + ('hfield', 'nhfield'), + ('texture', 'ntex'), + ('material', 'nmat'), + ('equality', 'neq'), + ('tendon', 'ntendon'), + ('actuator', 'nu'), + ('sensor', 'nsensor'), + ('numeric', 'nnumeric'), + ('text', 'ntext'), + ('tuple', 'ntuple'), + ] + for object_type, size_field in object_types_and_size_fields: + for idx in range(getattr(model, size_field)): + object_name = model.id2name(idx, object_type) + self.assertNotEqual(object_name, '', + msg='Model {!r} contains unnamed {!r} with ID {}.' + .format(model.name, object_type, idx)) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_model_has_at_least_2_cameras(self, domain, task): + env = suite.load(domain, task) + model = env.physics.model + self.assertGreaterEqual(model.ncam, 2, + 'Model {!r} should have at least 2 cameras, has {}.' + .format(model.name, model.ncam)) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_task_conforms_to_spec(self, domain, task): + """Tests that the environment timesteps conform to specifications.""" + is_benchmark = (domain, task) in suite.BENCHMARKING + env = suite.load(domain, task) + observation_spec = env.observation_spec() + action_spec = env.action_spec() + + # Check action bounds. + if is_benchmark: + self._validate_control_range(action_spec.minimum, action_spec.maximum) + + # Step through the environment, applying random actions sampled within the + # valid range and check the observations, rewards, and discounts. + policy = uniform_random_policy(action_spec) + for time_step in step_environment(env, policy): + self._validate_observation(time_step.observation, observation_spec) + self._validate_discount(time_step) + if is_benchmark: + self._validate_reward_range(time_step) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_environment_is_deterministic(self, domain, task): + """Tests that identical seeds and actions produce identical trajectories.""" + seed = 0 + # Iterate over two trajectories generated using identical sequences of + # random actions, and with identical task random states. Check that the + # observations, rewards, discounts and step types are identical. + trajectory1 = make_trajectory(domain=domain, task=task, seed=seed) + trajectory2 = make_trajectory(domain=domain, task=task, seed=seed) + for time_step1, time_step2 in zip(trajectory1, trajectory2): + self.assertEqual(time_step1.step_type, time_step2.step_type) + self.assertEqual(time_step1.reward, time_step2.reward) + self.assertEqual(time_step1.discount, time_step2.discount) + for key in six.iterkeys(time_step1.observation): + np.testing.assert_array_equal( + time_step1.observation[key], time_step2.observation[key], + err_msg='Observation {!r} is not equal.'.format(key)) + + def assertCorrectColors(self, physics, reward): + colors = physics.named.model.mat_rgba + for material_name in ('self', 'effector', 'target'): + highlight = colors[material_name + '_highlight'] + default = colors[material_name + '_default'] + blend_coef = reward ** 4 + expected = blend_coef * highlight + (1.0 - blend_coef) * default + actual = colors[material_name] + err_msg = ('Material {!r} has unexpected color.\nExpected: {!r}\n' + 'Actual: {!r}'.format(material_name, expected, actual)) + np.testing.assert_array_almost_equal(expected, actual, err_msg=err_msg) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_visualize_reward(self, domain, task): + env = suite.load(domain, task) + env.task.visualize_reward = True + action = np.zeros(env.action_spec().shape) + + with mock.patch.object(env.task, 'get_reward') as mock_get_reward: + mock_get_reward.return_value = -3.0 # Rewards < 0 should be clipped. + env.reset() + mock_get_reward.assert_called_with(env.physics) + self.assertCorrectColors(env.physics, reward=0.0) + + mock_get_reward.reset_mock() + mock_get_reward.return_value = 0.5 + env.step(action) + mock_get_reward.assert_called_with(env.physics) + self.assertCorrectColors(env.physics, reward=mock_get_reward.return_value) + + mock_get_reward.reset_mock() + mock_get_reward.return_value = 2.0 # Rewards > 1 should be clipped. + env.step(action) + mock_get_reward.assert_called_with(env.physics) + self.assertCorrectColors(env.physics, reward=1.0) + + mock_get_reward.reset_mock() + mock_get_reward.return_value = 0.25 + env.reset() + mock_get_reward.assert_called_with(env.physics) + self.assertCorrectColors(env.physics, reward=mock_get_reward.return_value) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_task_supports_environment_kwargs(self, domain, task): + env = suite.load(domain, task, + environment_kwargs=dict(flat_observation=True)) + # Check that the kwargs are actually passed through to the environment. + self.assertSetEqual(set(env.observation_spec()), + {control.FLAT_OBSERVATION_KEY}) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_observation_arrays_dont_share_memory(self, domain, task): + env = suite.load(domain, task) + first_timestep = env.reset() + action = np.zeros(env.action_spec().shape) + second_timestep = env.step(action) + for name, first_array in six.iteritems(first_timestep.observation): + second_array = second_timestep.observation[name] + self.assertFalse( + np.may_share_memory(first_array, second_array), + msg='Consecutive observations of {!r} may share memory.'.format(name)) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_observations_dont_contain_constant_elements(self, domain, task): + env = suite.load(domain, task) + trajectory = make_trajectory(domain=domain, task=task, seed=0, + num_episodes=2, max_steps_per_episode=1000) + observations = {name: [] for name in env.observation_spec()} + for time_step in trajectory: + for name, array in six.iteritems(time_step.observation): + observations[name].append(array) + + failures = [] + + for name, array_list in six.iteritems(observations): + # Sampling random uniform actions generally isn't sufficient to trigger + # these touch sensors. + if (domain in ('manipulator', 'stacker') and name == 'touch' or + domain == 'quadruped' and name == 'force_torque'): + continue + stacked_arrays = np.array(array_list) + is_constant = np.all(stacked_arrays == stacked_arrays[0], axis=0) + has_constant_elements = ( + is_constant if np.isscalar(is_constant) else np.any(is_constant)) + if has_constant_elements: + failures.append((name, is_constant)) + + self.assertEmpty( + failures, + msg='The following observation(s) contain constant elements:\n{}' + .format('\n'.join(':\t'.join([name, str(is_constant)]) + for (name, is_constant) in failures))) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_initial_state_is_randomized(self, domain, task): + env = suite.load(domain, task, task_kwargs={'random': 42}) + obs1 = env.reset().observation + obs2 = env.reset().observation + self.assertFalse( + all(np.all(obs1[k] == obs2[k]) for k in obs1), + 'Two consecutive initial states have identical observations.\n' + 'First: {}\nSecond: {}'.format(obs1, obs2)) + +if __name__ == '__main__': + absltest.main() diff --git a/Dreamer/local_dm_control_suite/tests/loader_test.py b/Dreamer/local_dm_control_suite/tests/loader_test.py new file mode 100755 index 0000000..cbce4f5 --- /dev/null +++ b/Dreamer/local_dm_control_suite/tests/loader_test.py @@ -0,0 +1,52 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Tests for the dm_control.suite loader.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Internal dependencies. + +from absl.testing import absltest + +from dm_control import suite +from dm_control.rl import control + + +class LoaderTest(absltest.TestCase): + + def test_load_without_kwargs(self): + env = suite.load('cartpole', 'swingup') + self.assertIsInstance(env, control.Environment) + + def test_load_with_kwargs(self): + env = suite.load('cartpole', 'swingup', + task_kwargs={'time_limit': 40, 'random': 99}) + self.assertIsInstance(env, control.Environment) + + +class LoaderConstantsTest(absltest.TestCase): + + def testSuiteConstants(self): + self.assertNotEmpty(suite.BENCHMARKING) + self.assertNotEmpty(suite.EASY) + self.assertNotEmpty(suite.HARD) + self.assertNotEmpty(suite.EXTRA) + + +if __name__ == '__main__': + absltest.main() diff --git a/Dreamer/local_dm_control_suite/tests/lqr_test.py b/Dreamer/local_dm_control_suite/tests/lqr_test.py new file mode 100755 index 0000000..d6edcf0 --- /dev/null +++ b/Dreamer/local_dm_control_suite/tests/lqr_test.py @@ -0,0 +1,88 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Tests specific to the LQR domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import unittest + +# Internal dependencies. +from absl import logging + +from absl.testing import absltest +from absl.testing import parameterized + +from local_dm_control_suite import lqr +from local_dm_control_suite import lqr_solver + +import numpy as np +from six.moves import range + + +class LqrTest(parameterized.TestCase): + + @parameterized.named_parameters( + ('lqr_2_1', lqr.lqr_2_1), + ('lqr_6_2', lqr.lqr_6_2)) + def test_lqr_optimal_policy(self, make_env): + env = make_env() + p, k, beta = lqr_solver.solve(env) + self.assertPolicyisOptimal(env, p, k, beta) + + @parameterized.named_parameters( + ('lqr_2_1', lqr.lqr_2_1), + ('lqr_6_2', lqr.lqr_6_2)) + @unittest.skipUnless( + condition=lqr_solver.sp, + reason='scipy is not available, so non-scipy DARE solver is the default.') + def test_lqr_optimal_policy_no_scipy(self, make_env): + env = make_env() + old_sp = lqr_solver.sp + try: + lqr_solver.sp = None # Force the solver to use the non-scipy code path. + p, k, beta = lqr_solver.solve(env) + finally: + lqr_solver.sp = old_sp + self.assertPolicyisOptimal(env, p, k, beta) + + def assertPolicyisOptimal(self, env, p, k, beta): + tolerance = 1e-3 + n_steps = int(math.ceil(math.log10(tolerance) / math.log10(beta))) + logging.info('%d timesteps for %g convergence.', n_steps, tolerance) + total_loss = 0.0 + + timestep = env.reset() + initial_state = np.hstack((timestep.observation['position'], + timestep.observation['velocity'])) + logging.info('Measuring total cost over %d steps.', n_steps) + for _ in range(n_steps): + x = np.hstack((timestep.observation['position'], + timestep.observation['velocity'])) + # u = k*x is the optimal policy + u = k.dot(x) + total_loss += 1 - (timestep.reward or 0.0) + timestep = env.step(u) + + logging.info('Analytical expected total cost is .5*x^T*p*x.') + expected_loss = .5 * initial_state.T.dot(p).dot(initial_state) + logging.info('Comparing measured and predicted costs.') + np.testing.assert_allclose(expected_loss, total_loss, rtol=tolerance) + +if __name__ == '__main__': + absltest.main() diff --git a/Dreamer/local_dm_control_suite/utils/__init__.py b/Dreamer/local_dm_control_suite/utils/__init__.py new file mode 100755 index 0000000..2ea19cf --- /dev/null +++ b/Dreamer/local_dm_control_suite/utils/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Utility functions used in the control suite.""" diff --git a/Dreamer/local_dm_control_suite/utils/parse_amc.py b/Dreamer/local_dm_control_suite/utils/parse_amc.py new file mode 100755 index 0000000..3cea2ab --- /dev/null +++ b/Dreamer/local_dm_control_suite/utils/parse_amc.py @@ -0,0 +1,251 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Parse and convert amc motion capture data.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control.mujoco.wrapper import mjbindings +import numpy as np +from scipy import interpolate +from six.moves import range + +mjlib = mjbindings.mjlib + +MOCAP_DT = 1.0/120.0 +CONVERSION_LENGTH = 0.056444 + +_CMU_MOCAP_JOINT_ORDER = ( + 'root0', 'root1', 'root2', 'root3', 'root4', 'root5', 'lowerbackrx', + 'lowerbackry', 'lowerbackrz', 'upperbackrx', 'upperbackry', 'upperbackrz', + 'thoraxrx', 'thoraxry', 'thoraxrz', 'lowerneckrx', 'lowerneckry', + 'lowerneckrz', 'upperneckrx', 'upperneckry', 'upperneckrz', 'headrx', + 'headry', 'headrz', 'rclaviclery', 'rclaviclerz', 'rhumerusrx', + 'rhumerusry', 'rhumerusrz', 'rradiusrx', 'rwristry', 'rhandrx', 'rhandrz', + 'rfingersrx', 'rthumbrx', 'rthumbrz', 'lclaviclery', 'lclaviclerz', + 'lhumerusrx', 'lhumerusry', 'lhumerusrz', 'lradiusrx', 'lwristry', + 'lhandrx', 'lhandrz', 'lfingersrx', 'lthumbrx', 'lthumbrz', 'rfemurrx', + 'rfemurry', 'rfemurrz', 'rtibiarx', 'rfootrx', 'rfootrz', 'rtoesrx', + 'lfemurrx', 'lfemurry', 'lfemurrz', 'ltibiarx', 'lfootrx', 'lfootrz', + 'ltoesrx' +) + +Converted = collections.namedtuple('Converted', + ['qpos', 'qvel', 'time']) + + +def convert(file_name, physics, timestep): + """Converts the parsed .amc values into qpos and qvel values and resamples. + + Args: + file_name: The .amc file to be parsed and converted. + physics: The corresponding physics instance. + timestep: Desired output interval between resampled frames. + + Returns: + A namedtuple with fields: + `qpos`, a numpy array containing converted positional variables. + `qvel`, a numpy array containing converted velocity variables. + `time`, a numpy array containing the corresponding times. + """ + frame_values = parse(file_name) + joint2index = {} + for name in physics.named.data.qpos.axes.row.names: + joint2index[name] = physics.named.data.qpos.axes.row.convert_key_item(name) + index2joint = {} + for joint, index in joint2index.items(): + if isinstance(index, slice): + indices = range(index.start, index.stop) + else: + indices = [index] + for ii in indices: + index2joint[ii] = joint + + # Convert frame_values to qpos + amcvals2qpos_transformer = Amcvals2qpos(index2joint, _CMU_MOCAP_JOINT_ORDER) + qpos_values = [] + for frame_value in frame_values: + qpos_values.append(amcvals2qpos_transformer(frame_value)) + qpos_values = np.stack(qpos_values) # Time by nq + + # Interpolate/resample. + # Note: interpolate quaternions rather than euler angles (slerp). + # see https://en.wikipedia.org/wiki/Slerp + qpos_values_resampled = [] + time_vals = np.arange(0, len(frame_values)*MOCAP_DT - 1e-8, MOCAP_DT) + time_vals_new = np.arange(0, len(frame_values)*MOCAP_DT, timestep) + while time_vals_new[-1] > time_vals[-1]: + time_vals_new = time_vals_new[:-1] + + for i in range(qpos_values.shape[1]): + f = interpolate.splrep(time_vals, qpos_values[:, i]) + qpos_values_resampled.append(interpolate.splev(time_vals_new, f)) + + qpos_values_resampled = np.stack(qpos_values_resampled) # nq by ntime + + qvel_list = [] + for t in range(qpos_values_resampled.shape[1]-1): + p_tp1 = qpos_values_resampled[:, t + 1] + p_t = qpos_values_resampled[:, t] + qvel = [(p_tp1[:3]-p_t[:3])/ timestep, + mj_quat2vel(mj_quatdiff(p_t[3:7], p_tp1[3:7]), timestep), + (p_tp1[7:]-p_t[7:])/ timestep] + qvel_list.append(np.concatenate(qvel)) + + qvel_values_resampled = np.vstack(qvel_list).T + + return Converted(qpos_values_resampled, qvel_values_resampled, time_vals_new) + + +def parse(file_name): + """Parses the amc file format.""" + values = [] + fid = open(file_name, 'r') + line = fid.readline().strip() + frame_ind = 1 + first_frame = True + while True: + # Parse first frame. + if first_frame and line[0] == str(frame_ind): + first_frame = False + frame_ind += 1 + frame_vals = [] + while True: + line = fid.readline().strip() + if not line or line == str(frame_ind): + values.append(np.array(frame_vals, dtype=np.float)) + break + tokens = line.split() + frame_vals.extend(tokens[1:]) + # Parse other frames. + elif line == str(frame_ind): + frame_ind += 1 + frame_vals = [] + while True: + line = fid.readline().strip() + if not line or line == str(frame_ind): + values.append(np.array(frame_vals, dtype=np.float)) + break + tokens = line.split() + frame_vals.extend(tokens[1:]) + else: + line = fid.readline().strip() + if not line: + break + return values + + +class Amcvals2qpos(object): + """Callable that converts .amc values for a frame and to MuJoCo qpos format. + """ + + def __init__(self, index2joint, joint_order): + """Initializes a new Amcvals2qpos instance. + + Args: + index2joint: List of joint angles in .amc file. + joint_order: List of joint names in MuJoco MJCF. + """ + # Root is x,y,z, then quat. + # need to get indices of qpos that order for amc default order + self.qpos_root_xyz_ind = [0, 1, 2] + self.root_xyz_ransform = np.array( + [[1, 0, 0], [0, 0, -1], [0, 1, 0]]) * CONVERSION_LENGTH + self.qpos_root_quat_ind = [3, 4, 5, 6] + amc2qpos_transform = np.zeros((len(index2joint), len(joint_order))) + for i in range(len(index2joint)): + for j in range(len(joint_order)): + if index2joint[i] == joint_order[j]: + if 'rx' in index2joint[i]: + amc2qpos_transform[i][j] = 1 + elif 'ry' in index2joint[i]: + amc2qpos_transform[i][j] = 1 + elif 'rz' in index2joint[i]: + amc2qpos_transform[i][j] = 1 + self.amc2qpos_transform = amc2qpos_transform + + def __call__(self, amc_val): + """Converts a `.amc` frame to MuJoCo qpos format.""" + amc_val_rad = np.deg2rad(amc_val) + qpos = np.dot(self.amc2qpos_transform, amc_val_rad) + + # Root. + qpos[:3] = np.dot(self.root_xyz_ransform, amc_val[:3]) + qpos_quat = euler2quat(amc_val[3], amc_val[4], amc_val[5]) + qpos_quat = mj_quatprod(euler2quat(90, 0, 0), qpos_quat) + + for i, ind in enumerate(self.qpos_root_quat_ind): + qpos[ind] = qpos_quat[i] + + return qpos + + +def euler2quat(ax, ay, az): + """Converts euler angles to a quaternion. + + Note: rotation order is zyx + + Args: + ax: Roll angle (deg) + ay: Pitch angle (deg). + az: Yaw angle (deg). + + Returns: + A numpy array representing the rotation as a quaternion. + """ + r1 = az + r2 = ay + r3 = ax + + c1 = np.cos(np.deg2rad(r1 / 2)) + s1 = np.sin(np.deg2rad(r1 / 2)) + c2 = np.cos(np.deg2rad(r2 / 2)) + s2 = np.sin(np.deg2rad(r2 / 2)) + c3 = np.cos(np.deg2rad(r3 / 2)) + s3 = np.sin(np.deg2rad(r3 / 2)) + + q0 = c1 * c2 * c3 + s1 * s2 * s3 + q1 = c1 * c2 * s3 - s1 * s2 * c3 + q2 = c1 * s2 * c3 + s1 * c2 * s3 + q3 = s1 * c2 * c3 - c1 * s2 * s3 + + return np.array([q0, q1, q2, q3]) + + +def mj_quatprod(q, r): + quaternion = np.zeros(4) + mjlib.mju_mulQuat(quaternion, np.ascontiguousarray(q), + np.ascontiguousarray(r)) + return quaternion + + +def mj_quat2vel(q, dt): + vel = np.zeros(3) + mjlib.mju_quat2Vel(vel, np.ascontiguousarray(q), dt) + return vel + + +def mj_quatneg(q): + quaternion = np.zeros(4) + mjlib.mju_negQuat(quaternion, np.ascontiguousarray(q)) + return quaternion + + +def mj_quatdiff(source, target): + return mj_quatprod(mj_quatneg(source), np.ascontiguousarray(target)) diff --git a/Dreamer/local_dm_control_suite/utils/parse_amc_test.py b/Dreamer/local_dm_control_suite/utils/parse_amc_test.py new file mode 100755 index 0000000..c8a9052 --- /dev/null +++ b/Dreamer/local_dm_control_suite/utils/parse_amc_test.py @@ -0,0 +1,68 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Tests for parse_amc utility.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +# Internal dependencies. + +from absl.testing import absltest +from local_dm_control_suite import humanoid_CMU +from dm_control.suite.utils import parse_amc + +from dm_control.utils import io as resources + +_TEST_AMC_PATH = resources.GetResourceFilename( + os.path.join(os.path.dirname(__file__), '../demos/zeros.amc')) + + +class ParseAMCTest(absltest.TestCase): + + def test_sizes_of_parsed_data(self): + + # Instantiate the humanoid environment. + env = humanoid_CMU.stand() + + # Parse and convert specified clip. + converted = parse_amc.convert( + _TEST_AMC_PATH, env.physics, env.control_timestep()) + + self.assertEqual(converted.qpos.shape[0], 63) + self.assertEqual(converted.qvel.shape[0], 62) + self.assertEqual(converted.time.shape[0], converted.qpos.shape[1]) + self.assertEqual(converted.qpos.shape[1], + converted.qvel.shape[1] + 1) + + # Parse and convert specified clip -- WITH SMALLER TIMESTEP + converted2 = parse_amc.convert( + _TEST_AMC_PATH, env.physics, 0.5 * env.control_timestep()) + + self.assertEqual(converted2.qpos.shape[0], 63) + self.assertEqual(converted2.qvel.shape[0], 62) + self.assertEqual(converted2.time.shape[0], converted2.qpos.shape[1]) + self.assertEqual(converted.qpos.shape[1], + converted.qvel.shape[1] + 1) + + # Compare sizes of parsed objects for different timesteps + self.assertEqual(converted.qpos.shape[1] * 2, converted2.qpos.shape[1]) + + +if __name__ == '__main__': + absltest.main() diff --git a/Dreamer/local_dm_control_suite/utils/randomizers.py b/Dreamer/local_dm_control_suite/utils/randomizers.py new file mode 100755 index 0000000..30ec182 --- /dev/null +++ b/Dreamer/local_dm_control_suite/utils/randomizers.py @@ -0,0 +1,91 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Randomization functions.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from dm_control.mujoco.wrapper import mjbindings +import numpy as np +from six.moves import range + + +def random_limited_quaternion(random, limit): + """Generates a random quaternion limited to the specified rotations.""" + axis = random.randn(3) + axis /= np.linalg.norm(axis) + angle = random.rand() * limit + + quaternion = np.zeros(4) + mjbindings.mjlib.mju_axisAngle2Quat(quaternion, axis, angle) + + return quaternion + + +def randomize_limited_and_rotational_joints(physics, random=None): + """Randomizes the positions of joints defined in the physics body. + + The following randomization rules apply: + - Bounded joints (hinges or sliders) are sampled uniformly in the bounds. + - Unbounded hinges are samples uniformly in [-pi, pi] + - Quaternions for unlimited free joints and ball joints are sampled + uniformly on the unit 3-sphere. + - Quaternions for limited ball joints are sampled uniformly on a sector + of the unit 3-sphere. + - The linear degrees of freedom of free joints are not randomized. + + Args: + physics: Instance of 'Physics' class that holds a loaded model. + random: Optional instance of 'np.random.RandomState'. Defaults to the global + NumPy random state. + """ + random = random or np.random + + hinge = mjbindings.enums.mjtJoint.mjJNT_HINGE + slide = mjbindings.enums.mjtJoint.mjJNT_SLIDE + ball = mjbindings.enums.mjtJoint.mjJNT_BALL + free = mjbindings.enums.mjtJoint.mjJNT_FREE + + qpos = physics.named.data.qpos + + for joint_id in range(physics.model.njnt): + joint_name = physics.model.id2name(joint_id, 'joint') + joint_type = physics.model.jnt_type[joint_id] + is_limited = physics.model.jnt_limited[joint_id] + range_min, range_max = physics.model.jnt_range[joint_id] + + if is_limited: + if joint_type == hinge or joint_type == slide: + qpos[joint_name] = random.uniform(range_min, range_max) + + elif joint_type == ball: + qpos[joint_name] = random_limited_quaternion(random, range_max) + + else: + if joint_type == hinge: + qpos[joint_name] = random.uniform(-np.pi, np.pi) + + elif joint_type == ball: + quat = random.randn(4) + quat /= np.linalg.norm(quat) + qpos[joint_name] = quat + + elif joint_type == free: + quat = random.rand(4) + quat /= np.linalg.norm(quat) + qpos[joint_name][3:] = quat + diff --git a/Dreamer/local_dm_control_suite/utils/randomizers_test.py b/Dreamer/local_dm_control_suite/utils/randomizers_test.py new file mode 100755 index 0000000..8b9b72d --- /dev/null +++ b/Dreamer/local_dm_control_suite/utils/randomizers_test.py @@ -0,0 +1,164 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Tests for randomizers.py.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Internal dependencies. +from absl.testing import absltest +from absl.testing import parameterized +from dm_control import mujoco +from dm_control.mujoco.wrapper import mjbindings +from dm_control.suite.utils import randomizers +import numpy as np +from six.moves import range + +mjlib = mjbindings.mjlib + + +class RandomizeUnlimitedJointsTest(parameterized.TestCase): + + def setUp(self): + self.rand = np.random.RandomState(100) + + def test_single_joint_of_each_type(self): + physics = mujoco.Physics.from_xml_string(""" + + + + + + + + + + + + + + + + + + + + + + + + + """) + + randomizers.randomize_limited_and_rotational_joints(physics, self.rand) + self.assertNotEqual(0., physics.named.data.qpos['hinge']) + self.assertNotEqual(0., physics.named.data.qpos['limited_hinge']) + self.assertNotEqual(0., physics.named.data.qpos['limited_slide']) + + self.assertNotEqual(0., np.sum(physics.named.data.qpos['ball'])) + self.assertNotEqual(0., np.sum(physics.named.data.qpos['limited_ball'])) + + self.assertNotEqual(0., np.sum(physics.named.data.qpos['free'][3:])) + + # Unlimited slide and the positional part of the free joint remains + # uninitialized. + self.assertEqual(0., physics.named.data.qpos['slide']) + self.assertEqual(0., np.sum(physics.named.data.qpos['free'][:3])) + + def test_multiple_joints_of_same_type(self): + physics = mujoco.Physics.from_xml_string(""" + + + + + + + + + """) + + randomizers.randomize_limited_and_rotational_joints(physics, self.rand) + self.assertNotEqual(0., physics.named.data.qpos['hinge_1']) + self.assertNotEqual(0., physics.named.data.qpos['hinge_2']) + self.assertNotEqual(0., physics.named.data.qpos['hinge_3']) + + self.assertNotEqual(physics.named.data.qpos['hinge_1'], + physics.named.data.qpos['hinge_2']) + + self.assertNotEqual(physics.named.data.qpos['hinge_2'], + physics.named.data.qpos['hinge_3']) + + self.assertNotEqual(physics.named.data.qpos['hinge_1'], + physics.named.data.qpos['hinge_3']) + + def test_unlimited_hinge_randomization_range(self): + physics = mujoco.Physics.from_xml_string(""" + + + + + + + """) + + for _ in range(10): + randomizers.randomize_limited_and_rotational_joints(physics, self.rand) + self.assertBetween(physics.named.data.qpos['hinge'], -np.pi, np.pi) + + def test_limited_1d_joint_limits_are_respected(self): + physics = mujoco.Physics.from_xml_string(""" + + + + + + + + + + + """) + + for _ in range(10): + randomizers.randomize_limited_and_rotational_joints(physics, self.rand) + self.assertBetween(physics.named.data.qpos['hinge'], + np.deg2rad(0), np.deg2rad(10)) + self.assertBetween(physics.named.data.qpos['slide'], 30, 50) + + def test_limited_ball_joint_are_respected(self): + physics = mujoco.Physics.from_xml_string(""" + + + + + + + """) + + body_axis = np.array([1., 0., 0.]) + joint_axis = np.zeros(3) + for _ in range(10): + randomizers.randomize_limited_and_rotational_joints(physics, self.rand) + + quat = physics.named.data.qpos['ball'] + mjlib.mju_rotVecQuat(joint_axis, body_axis, quat) + angle_cos = np.dot(body_axis, joint_axis) + self.assertGreater(angle_cos, 0.5) # cos(60) = 0.5 + + +if __name__ == '__main__': + absltest.main() diff --git a/Dreamer/local_dm_control_suite/walker.py b/Dreamer/local_dm_control_suite/walker.py new file mode 100755 index 0000000..b7bfd58 --- /dev/null +++ b/Dreamer/local_dm_control_suite/walker.py @@ -0,0 +1,158 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Planar Walker Domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.suite.utils import randomizers +from dm_control.utils import containers +from dm_control.utils import rewards + + +_DEFAULT_TIME_LIMIT = 25 +_CONTROL_TIMESTEP = .025 + +# Minimal height of torso over foot above which stand reward is 1. +_STAND_HEIGHT = 1.2 + +# Horizontal speeds (meters/second) above which move reward is 1. +_WALK_SPEED = 1 +_RUN_SPEED = 8 + + +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('walker.xml'), common.ASSETS + + +@SUITE.add('benchmarking') +def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Stand task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = PlanarWalker(move_speed=0, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add('benchmarking') +def walk(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Walk task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = PlanarWalker(move_speed=_WALK_SPEED, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add('benchmarking') +def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Run task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = PlanarWalker(move_speed=_RUN_SPEED, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Walker domain.""" + + def torso_upright(self): + """Returns projection from z-axes of torso to the z-axes of world.""" + return self.named.data.xmat['torso', 'zz'] + + def torso_height(self): + """Returns the height of the torso.""" + return self.named.data.xpos['torso', 'z'] + + def horizontal_velocity(self): + """Returns the horizontal velocity of the center-of-mass.""" + return self.named.data.sensordata['torso_subtreelinvel'][0] + + def orientations(self): + """Returns planar orientations of all bodies.""" + return self.named.data.xmat[1:, ['xx', 'xz']].ravel() + + +class PlanarWalker(base.Task): + """A planar walker task.""" + + def __init__(self, move_speed, random=None): + """Initializes an instance of `PlanarWalker`. + + Args: + move_speed: A float. If this value is zero, reward is given simply for + standing up. Otherwise this specifies a target horizontal velocity for + the walking task. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._move_speed = move_speed + super(PlanarWalker, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + In 'standing' mode, use initial orientation and small velocities. + In 'random' mode, randomize joint angles and let fall to the floor. + + Args: + physics: An instance of `Physics`. + + """ + randomizers.randomize_limited_and_rotational_joints(physics, self.random) + super(PlanarWalker, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of body orientations, height and velocites.""" + obs = collections.OrderedDict() + obs['orientations'] = physics.orientations() + obs['height'] = physics.torso_height() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a reward to the agent.""" + standing = rewards.tolerance(physics.torso_height(), + bounds=(_STAND_HEIGHT, float('inf')), + margin=_STAND_HEIGHT/2) + upright = (1 + physics.torso_upright()) / 2 + stand_reward = (3*standing + upright) / 4 + if self._move_speed == 0: + return stand_reward + else: + move_reward = rewards.tolerance(physics.horizontal_velocity(), + bounds=(self._move_speed, float('inf')), + margin=self._move_speed/2, + value_at_margin=0.5, + sigmoid='linear') + return stand_reward * (5*move_reward + 1) / 6 diff --git a/Dreamer/local_dm_control_suite/walker.xml b/Dreamer/local_dm_control_suite/walker.xml new file mode 100755 index 0000000..9509893 --- /dev/null +++ b/Dreamer/local_dm_control_suite/walker.xml @@ -0,0 +1,70 @@ + + + + + + diff --git a/Dreamer/local_dm_control_suite/wrappers/__init__.py b/Dreamer/local_dm_control_suite/wrappers/__init__.py new file mode 100755 index 0000000..f7e4a68 --- /dev/null +++ b/Dreamer/local_dm_control_suite/wrappers/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2018 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Environment wrappers used to extend or modify environment behaviour.""" diff --git a/Dreamer/local_dm_control_suite/wrappers/action_noise.py b/Dreamer/local_dm_control_suite/wrappers/action_noise.py new file mode 100755 index 0000000..dab9970 --- /dev/null +++ b/Dreamer/local_dm_control_suite/wrappers/action_noise.py @@ -0,0 +1,74 @@ +# Copyright 2018 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Wrapper control suite environments that adds Gaussian noise to actions.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import dm_env +import numpy as np + + +_BOUNDS_MUST_BE_FINITE = ( + 'All bounds in `env.action_spec()` must be finite, got: {action_spec}') + + +class Wrapper(dm_env.Environment): + """Wraps a control environment and adds Gaussian noise to actions.""" + + def __init__(self, env, scale=0.01): + """Initializes a new action noise Wrapper. + + Args: + env: The control suite environment to wrap. + scale: The standard deviation of the noise, expressed as a fraction + of the max-min range for each action dimension. + + Raises: + ValueError: If any of the action dimensions of the wrapped environment are + unbounded. + """ + action_spec = env.action_spec() + if not (np.all(np.isfinite(action_spec.minimum)) and + np.all(np.isfinite(action_spec.maximum))): + raise ValueError(_BOUNDS_MUST_BE_FINITE.format(action_spec=action_spec)) + self._minimum = action_spec.minimum + self._maximum = action_spec.maximum + self._noise_std = scale * (action_spec.maximum - action_spec.minimum) + self._env = env + + def step(self, action): + noisy_action = action + self._env.task.random.normal(scale=self._noise_std) + # Clip the noisy actions in place so that they fall within the bounds + # specified by the `action_spec`. Note that MuJoCo implicitly clips out-of- + # bounds control inputs, but we also clip here in case the actions do not + # correspond directly to MuJoCo actuators, or if there are other wrapper + # layers that expect the actions to be within bounds. + np.clip(noisy_action, self._minimum, self._maximum, out=noisy_action) + return self._env.step(noisy_action) + + def reset(self): + return self._env.reset() + + def observation_spec(self): + return self._env.observation_spec() + + def action_spec(self): + return self._env.action_spec() + + def __getattr__(self, name): + return getattr(self._env, name) diff --git a/Dreamer/local_dm_control_suite/wrappers/action_noise_test.py b/Dreamer/local_dm_control_suite/wrappers/action_noise_test.py new file mode 100755 index 0000000..dcc5330 --- /dev/null +++ b/Dreamer/local_dm_control_suite/wrappers/action_noise_test.py @@ -0,0 +1,136 @@ +# Copyright 2018 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Tests for the action noise wrapper.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Internal dependencies. +from absl.testing import absltest +from absl.testing import parameterized +from dm_control.rl import control +from dm_control.suite.wrappers import action_noise +from dm_env import specs +import mock +import numpy as np + + +class ActionNoiseTest(parameterized.TestCase): + + def make_action_spec(self, lower=(-1.,), upper=(1.,)): + lower, upper = np.broadcast_arrays(lower, upper) + return specs.BoundedArray( + shape=lower.shape, dtype=float, minimum=lower, maximum=upper) + + def make_mock_env(self, action_spec=None): + action_spec = action_spec or self.make_action_spec() + env = mock.Mock(spec=control.Environment) + env.action_spec.return_value = action_spec + return env + + def assertStepCalledOnceWithCorrectAction(self, env, expected_action): + # NB: `assert_called_once_with()` doesn't support numpy arrays. + env.step.assert_called_once() + actual_action = env.step.call_args_list[0][0][0] + np.testing.assert_array_equal(expected_action, actual_action) + + @parameterized.parameters([ + dict(lower=np.r_[-1., 0.], upper=np.r_[1., 2.], scale=0.05), + dict(lower=np.r_[-1., 0.], upper=np.r_[1., 2.], scale=0.), + dict(lower=np.r_[-1., 0.], upper=np.r_[-1., 0.], scale=0.05), + ]) + def test_step(self, lower, upper, scale): + seed = 0 + std = scale * (upper - lower) + expected_noise = np.random.RandomState(seed).normal(scale=std) + action = np.random.RandomState(seed).uniform(lower, upper) + expected_noisy_action = np.clip(action + expected_noise, lower, upper) + task = mock.Mock(spec=control.Task) + task.random = np.random.RandomState(seed) + action_spec = self.make_action_spec(lower=lower, upper=upper) + env = self.make_mock_env(action_spec=action_spec) + env.task = task + wrapped_env = action_noise.Wrapper(env, scale=scale) + time_step = wrapped_env.step(action) + self.assertStepCalledOnceWithCorrectAction(env, expected_noisy_action) + self.assertIs(time_step, env.step(expected_noisy_action)) + + @parameterized.named_parameters([ + dict(testcase_name='within_bounds', action=np.r_[-1.], noise=np.r_[0.1]), + dict(testcase_name='below_lower', action=np.r_[-1.], noise=np.r_[-0.1]), + dict(testcase_name='above_upper', action=np.r_[1.], noise=np.r_[0.1]), + ]) + def test_action_clipping(self, action, noise): + lower = -1. + upper = 1. + expected_noisy_action = np.clip(action + noise, lower, upper) + task = mock.Mock(spec=control.Task) + task.random = mock.Mock(spec=np.random.RandomState) + task.random.normal.return_value = noise + action_spec = self.make_action_spec(lower=lower, upper=upper) + env = self.make_mock_env(action_spec=action_spec) + env.task = task + wrapped_env = action_noise.Wrapper(env) + time_step = wrapped_env.step(action) + self.assertStepCalledOnceWithCorrectAction(env, expected_noisy_action) + self.assertIs(time_step, env.step(expected_noisy_action)) + + @parameterized.parameters([ + dict(lower=np.r_[-1., 0.], upper=np.r_[1., np.inf]), + dict(lower=np.r_[np.nan, 0.], upper=np.r_[1., 2.]), + ]) + def test_error_if_action_bounds_non_finite(self, lower, upper): + action_spec = self.make_action_spec(lower=lower, upper=upper) + env = self.make_mock_env(action_spec=action_spec) + with self.assertRaisesWithLiteralMatch( + ValueError, + action_noise._BOUNDS_MUST_BE_FINITE.format(action_spec=action_spec)): + _ = action_noise.Wrapper(env) + + def test_reset(self): + env = self.make_mock_env() + wrapped_env = action_noise.Wrapper(env) + time_step = wrapped_env.reset() + env.reset.assert_called_once_with() + self.assertIs(time_step, env.reset()) + + def test_observation_spec(self): + env = self.make_mock_env() + wrapped_env = action_noise.Wrapper(env) + observation_spec = wrapped_env.observation_spec() + env.observation_spec.assert_called_once_with() + self.assertIs(observation_spec, env.observation_spec()) + + def test_action_spec(self): + env = self.make_mock_env() + wrapped_env = action_noise.Wrapper(env) + # `env.action_spec()` is called in `Wrapper.__init__()` + env.action_spec.reset_mock() + action_spec = wrapped_env.action_spec() + env.action_spec.assert_called_once_with() + self.assertIs(action_spec, env.action_spec()) + + @parameterized.parameters(['task', 'physics', 'control_timestep']) + def test_getattr(self, attribute_name): + env = self.make_mock_env() + wrapped_env = action_noise.Wrapper(env) + attr = getattr(wrapped_env, attribute_name) + self.assertIs(attr, getattr(env, attribute_name)) + + +if __name__ == '__main__': + absltest.main() diff --git a/Dreamer/local_dm_control_suite/wrappers/pixels.py b/Dreamer/local_dm_control_suite/wrappers/pixels.py new file mode 100755 index 0000000..0f55fff --- /dev/null +++ b/Dreamer/local_dm_control_suite/wrappers/pixels.py @@ -0,0 +1,120 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Wrapper that adds pixel observations to a control environment.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +import dm_env +from dm_env import specs + +STATE_KEY = 'state' + + +class Wrapper(dm_env.Environment): + """Wraps a control environment and adds a rendered pixel observation.""" + + def __init__(self, env, pixels_only=True, render_kwargs=None, + observation_key='pixels'): + """Initializes a new pixel Wrapper. + + Args: + env: The environment to wrap. + pixels_only: If True (default), the original set of 'state' observations + returned by the wrapped environment will be discarded, and the + `OrderedDict` of observations will only contain pixels. If False, the + `OrderedDict` will contain the original observations as well as the + pixel observations. + render_kwargs: Optional `dict` containing keyword arguments passed to the + `mujoco.Physics.render` method. + observation_key: Optional custom string specifying the pixel observation's + key in the `OrderedDict` of observations. Defaults to 'pixels'. + + Raises: + ValueError: If `env`'s observation spec is not compatible with the + wrapper. Supported formats are a single array, or a dict of arrays. + ValueError: If `env`'s observation already contains the specified + `observation_key`. + """ + if render_kwargs is None: + render_kwargs = {} + + wrapped_observation_spec = env.observation_spec() + + if isinstance(wrapped_observation_spec, specs.Array): + self._observation_is_dict = False + invalid_keys = set([STATE_KEY]) + elif isinstance(wrapped_observation_spec, collections.MutableMapping): + self._observation_is_dict = True + invalid_keys = set(wrapped_observation_spec.keys()) + else: + raise ValueError('Unsupported observation spec structure.') + + if not pixels_only and observation_key in invalid_keys: + raise ValueError('Duplicate or reserved observation key {!r}.' + .format(observation_key)) + + if pixels_only: + self._observation_spec = collections.OrderedDict() + elif self._observation_is_dict: + self._observation_spec = wrapped_observation_spec.copy() + else: + self._observation_spec = collections.OrderedDict() + self._observation_spec[STATE_KEY] = wrapped_observation_spec + + # Extend observation spec. + pixels = env.physics.render(**render_kwargs) + pixels_spec = specs.Array( + shape=pixels.shape, dtype=pixels.dtype, name=observation_key) + self._observation_spec[observation_key] = pixels_spec + + self._env = env + self._pixels_only = pixels_only + self._render_kwargs = render_kwargs + self._observation_key = observation_key + + def reset(self): + time_step = self._env.reset() + return self._add_pixel_observation(time_step) + + def step(self, action): + time_step = self._env.step(action) + return self._add_pixel_observation(time_step) + + def observation_spec(self): + return self._observation_spec + + def action_spec(self): + return self._env.action_spec() + + def _add_pixel_observation(self, time_step): + if self._pixels_only: + observation = collections.OrderedDict() + elif self._observation_is_dict: + observation = type(time_step.observation)(time_step.observation) + else: + observation = collections.OrderedDict() + observation[STATE_KEY] = time_step.observation + + pixels = self._env.physics.render(**self._render_kwargs) + observation[self._observation_key] = pixels + return time_step._replace(observation=observation) + + def __getattr__(self, name): + return getattr(self._env, name) diff --git a/Dreamer/local_dm_control_suite/wrappers/pixels_test.py b/Dreamer/local_dm_control_suite/wrappers/pixels_test.py new file mode 100755 index 0000000..26b7fc1 --- /dev/null +++ b/Dreamer/local_dm_control_suite/wrappers/pixels_test.py @@ -0,0 +1,133 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Tests for the pixel wrapper.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +# Internal dependencies. +from absl.testing import absltest +from absl.testing import parameterized +from local_dm_control_suite import cartpole +from dm_control.suite.wrappers import pixels +import dm_env +from dm_env import specs + +import numpy as np + + +class FakePhysics(object): + + def render(self, *args, **kwargs): + del args + del kwargs + return np.zeros((4, 5, 3), dtype=np.uint8) + + +class FakeArrayObservationEnvironment(dm_env.Environment): + + def __init__(self): + self.physics = FakePhysics() + + def reset(self): + return dm_env.restart(np.zeros((2,))) + + def step(self, action): + del action + return dm_env.transition(0.0, np.zeros((2,))) + + def action_spec(self): + pass + + def observation_spec(self): + return specs.Array(shape=(2,), dtype=np.float) + + +class PixelsTest(parameterized.TestCase): + + @parameterized.parameters(True, False) + def test_dict_observation(self, pixels_only): + pixel_key = 'rgb' + + env = cartpole.swingup() + + # Make sure we are testing the right environment for the test. + observation_spec = env.observation_spec() + self.assertIsInstance(observation_spec, collections.OrderedDict) + + width = 320 + height = 240 + + # The wrapper should only add one observation. + wrapped = pixels.Wrapper(env, + observation_key=pixel_key, + pixels_only=pixels_only, + render_kwargs={'width': width, 'height': height}) + + wrapped_observation_spec = wrapped.observation_spec() + self.assertIsInstance(wrapped_observation_spec, collections.OrderedDict) + + if pixels_only: + self.assertLen(wrapped_observation_spec, 1) + self.assertEqual([pixel_key], list(wrapped_observation_spec.keys())) + else: + expected_length = len(observation_spec) + 1 + self.assertLen(wrapped_observation_spec, expected_length) + expected_keys = list(observation_spec.keys()) + [pixel_key] + self.assertEqual(expected_keys, list(wrapped_observation_spec.keys())) + + # Check that the added spec item is consistent with the added observation. + time_step = wrapped.reset() + rgb_observation = time_step.observation[pixel_key] + wrapped_observation_spec[pixel_key].validate(rgb_observation) + + self.assertEqual(rgb_observation.shape, (height, width, 3)) + self.assertEqual(rgb_observation.dtype, np.uint8) + + @parameterized.parameters(True, False) + def test_single_array_observation(self, pixels_only): + pixel_key = 'depth' + + env = FakeArrayObservationEnvironment() + observation_spec = env.observation_spec() + self.assertIsInstance(observation_spec, specs.Array) + + wrapped = pixels.Wrapper(env, observation_key=pixel_key, + pixels_only=pixels_only) + wrapped_observation_spec = wrapped.observation_spec() + self.assertIsInstance(wrapped_observation_spec, collections.OrderedDict) + + if pixels_only: + self.assertLen(wrapped_observation_spec, 1) + self.assertEqual([pixel_key], list(wrapped_observation_spec.keys())) + else: + self.assertLen(wrapped_observation_spec, 2) + self.assertEqual([pixels.STATE_KEY, pixel_key], + list(wrapped_observation_spec.keys())) + + time_step = wrapped.reset() + + depth_observation = time_step.observation[pixel_key] + wrapped_observation_spec[pixel_key].validate(depth_observation) + + self.assertEqual(depth_observation.shape, (4, 5, 3)) + self.assertEqual(depth_observation.dtype, np.uint8) + +if __name__ == '__main__': + absltest.main() diff --git a/Dreamer/models.py b/Dreamer/models.py new file mode 100644 index 0000000..3e7d592 --- /dev/null +++ b/Dreamer/models.py @@ -0,0 +1,297 @@ +import numpy as np +import tensorflow as tf +from tensorflow.keras import layers as tfkl +from tensorflow_probability import distributions as tfd +from tensorflow.keras.mixed_precision import experimental as prec + +import tools + + +class RSSM(tools.Module): + + def __init__(self, stoch=30, deter=200, hidden=200, act=tf.nn.elu): + super().__init__() + self._activation = act + self._stoch_size = stoch + self._deter_size = deter + self._hidden_size = hidden + self._cell = tfkl.GRUCell(self._deter_size) + + def initial(self, batch_size): + dtype = prec.global_policy().compute_dtype + return dict( + mean=tf.zeros([batch_size, self._stoch_size], dtype), + std=tf.zeros([batch_size, self._stoch_size], dtype), + stoch=tf.zeros([batch_size, self._stoch_size], dtype), + deter=self._cell.get_initial_state(None, batch_size, dtype)) + + + @tf.function + def observe(self, embed, action, state=None): + if state is None: + state = self.initial(tf.shape(action)[0]) + embed = tf.transpose(embed, [1, 0, 2]) + action = tf.transpose(action, [1, 0, 2]) + post, prior = tools.static_scan( + lambda prev, inputs: self.obs_step( + prev[0], *inputs), + (action, embed), (state, state)) + post = {k: tf.transpose(v, [1, 0, 2]) for k, v in post.items()} + prior = {k: tf.transpose(v, [1, 0, 2]) for k, v in prior.items()} + return post, prior + + @tf.function + def imagine(self, action, state=None): + if state is None: + state = self.initial(tf.shape(action)[0]) + assert isinstance(state, dict), state + action = tf.transpose(action, [1, 0, 2]) + prior = tools.static_scan(self.img_step, action, state) + prior = {k: tf.transpose(v, [1, 0, 2]) for k, v in prior.items()} + return prior + + def get_feat(self, state): + return tf.concat([state['stoch'], state['deter']], -1) + + def get_dist(self, state): + return tfd.MultivariateNormalDiag(state['mean'], state['std']) + + @tf.function + def obs_step(self, prev_state, prev_action, embed): + prior = self.img_step(prev_state, prev_action) + x = tf.concat([prior['deter'], embed], -1) + x = self.get('obs1', tfkl.Dense, self._hidden_size, + self._activation)(x) + x = self.get('obs2', tfkl.Dense, 2 * self._stoch_size, None)(x) + mean, std = tf.split(x, 2, -1) + std = tf.nn.softplus(std) + 0.1 + stoch = self.get_dist({'mean': mean, 'std': std}).sample() + post = {'mean': mean, 'std': std, + 'stoch': stoch, 'deter': prior['deter']} + return post, prior + + @tf.function + def img_step(self, prev_state, prev_action): + x = tf.concat([prev_state['stoch'], prev_action], -1) + x = self.get('img1', tfkl.Dense, self._hidden_size, + self._activation)(x) + x, deter = self._cell(x, [prev_state['deter']]) + deter = deter[0] # Keras wraps the state in a list. + x = self.get('img2', tfkl.Dense, self._hidden_size, + self._activation)(x) + x = self.get('img3', tfkl.Dense, 2 * self._stoch_size, None)(x) + mean, std = tf.split(x, 2, -1) + std = tf.nn.softplus(std) + 0.1 + stoch = self.get_dist({'mean': mean, 'std': std}).sample() + prior = {'mean': mean, 'std': std, 'stoch': stoch, 'deter': deter} + return prior + + +class ConvEncoder(tools.Module): + + def __init__(self, depth=32, act=tf.nn.relu, image_size=64): + self._act = act + self._depth = depth + self._image_size = image_size + + if image_size == 64: + self._outdim = 32 * self._depth + self._kernel_sizes = [4, 4, 4, 4] + elif image_size == 32: + self._outdim = 8 * self._depth + self._kernel_sizes = [3, 3, 3, 3] + elif image_size == 84: + self._outdim = 72 * self._depth + self._kernel_sizes = [4, 4, 4, 4] + else: + raise NotImplementedError + + def __call__(self, obs): + kwargs = dict(strides=2, activation=self._act) + x = tf.reshape(obs['image'], (-1,) + tuple(obs['image'].shape[-3:])) + x = self.get('h1', tfkl.Conv2D, 1 * self._depth, + self._kernel_sizes[0], **kwargs)(x) + x = self.get('h2', tfkl.Conv2D, 2 * self._depth, + self._kernel_sizes[1], **kwargs)(x) + x = self.get('h3', tfkl.Conv2D, 4 * self._depth, + self._kernel_sizes[2], **kwargs)(x) + x = self.get('h4', tfkl.Conv2D, 8 * self._depth, + self._kernel_sizes[3], **kwargs)(x) + shape = tf.concat([tf.shape(obs['image'])[:-3], [self._outdim]], 0) + return tf.reshape(x, shape) + + +class ConvDecoder(tools.Module): + + def __init__(self, depth=32, act=tf.nn.relu, shape=(64, 64, 3)): + self._act = act + self._depth = depth + self._shape = shape + + if shape[0] == 64: + self._outdim = 32 * self._depth + self._kernel_sizes = [5, 5, 6, 6] + elif shape[0] == 32: + self._outdim = 8 * self._depth + self._kernel_sizes = [3, 3, 3, 4] + elif shape[0] == 84: + self._outdim = 72 * self._depth + self._kernel_sizes = [7, 6, 6, 6] + else: + raise NotImplementedError + + def __call__(self, features): + kwargs = dict(strides=2, activation=self._act) + x = self.get('h1', tfkl.Dense, self._outdim, None)(features) + x = tf.reshape(x, [-1, 1, 1, self._outdim]) + x = self.get('h2', tfkl.Conv2DTranspose, + 4 * self._depth, self._kernel_sizes[0], **kwargs)(x) + x = self.get('h3', tfkl.Conv2DTranspose, + 2 * self._depth, self._kernel_sizes[1], **kwargs)(x) + x = self.get('h4', tfkl.Conv2DTranspose, + 1 * self._depth, self._kernel_sizes[2], **kwargs)(x) + x = self.get('h5', tfkl.Conv2DTranspose, + self._shape[-1], self._kernel_sizes[3], strides=2)(x) + mean = tf.reshape(x, tf.concat( + [tf.shape(features)[:-1], self._shape], 0)) + return tfd.Independent(tfd.Normal(mean, 1), len(self._shape)) + + +class ConvDecoderMask(tools.Module): + + def __init__(self, depth=32, act=tf.nn.relu, shape=(64, 64, 3)): + self._act = act + self._depth = depth + self._shape = shape + + if shape[0] == 64: + self._outdim = 32 * self._depth + self._kernel_sizes = [5, 5, 6, 6] + elif shape[0] == 32: + self._outdim = 8 * self._depth + self._kernel_sizes = [3, 3, 3, 4] + elif shape[0] == 84: + self._outdim = 72 * self._depth + self._kernel_sizes = [7, 6, 6, 6] + else: + raise NotImplementedError + + def __call__(self, features): + kwargs = dict(strides=2, activation=self._act) + x = self.get('h1', tfkl.Dense, self._outdim, None)(features) + x = tf.reshape(x, [-1, 1, 1, self._outdim]) + x = self.get('h2', tfkl.Conv2DTranspose, + 4 * self._depth, self._kernel_sizes[0], **kwargs)(x) + x = self.get('h3', tfkl.Conv2DTranspose, + 2 * self._depth, self._kernel_sizes[1], **kwargs)(x) + x = self.get('h4', tfkl.Conv2DTranspose, + 1 * self._depth, self._kernel_sizes[2], **kwargs)(x) + x = self.get('h5', tfkl.Conv2DTranspose, + 3 + self._shape[-1], self._kernel_sizes[3], strides=2)(x) + mean, mask = tf.split(x, [3, 3], -1) + mean = tf.reshape(mean, tf.concat( + [tf.shape(features)[:-1], self._shape], 0)) + mask = tf.reshape(mask, tf.concat( + [tf.shape(features)[:-1], self._shape], 0)) + return tfd.Independent(tfd.Normal(mean, 1), len(self._shape)), mask + + +class ConvDecoderMaskEnsemble(tools.Module): + """ + ensemble two convdecoder with outputs + """ + + def __init__(self, decoder1, decoder2, precision): + self._decoder1 = decoder1 + self._decoder2 = decoder2 + self._precision = 'float' + str(precision) + self._shape = decoder1._shape + + def __call__(self, feat1, feat2): + kwargs = dict(strides=1, activation=tf.nn.sigmoid) + pred1, mask1 = self._decoder1(feat1) + pred2, mask2 = self._decoder2(feat2) + mean1 = pred1.submodules[0].loc + mean2 = pred2.submodules[0].loc + mask_feat = tf.concat([mask1, mask2], -1) + mask = self.get('mask1', tfkl.Conv2D, 1, 1, **kwargs)(mask_feat) + mask_use1 = mask + mask_use2 = 1-mask + mean = mean1 * tf.cast(mask_use1, self._precision) + \ + mean2 * tf.cast(mask_use2, self._precision) + return tfd.Independent(tfd.Normal(mean, 1), len(self._shape)), pred1, pred2, tf.cast(mask_use1, self._precision) + + +class InverseDecoder(tools.Module): + + def __init__(self, shape, layers, units, act=tf.nn.elu): + self._shape = shape + self._layers = layers + self._units = units + self._act = act + + def __call__(self, features): + x = tf.concat([features[:, :-1], features[:, 1:]], -1) + for index in range(self._layers): + x = self.get(f'h{index}', tfkl.Dense, self._units, self._act)(x) + x = self.get(f'hout', tfkl.Dense, np.prod(self._shape))(x) + return tfd.Independent(tfd.Normal(x, 1), 1) + + +class DenseDecoder(tools.Module): + + def __init__(self, shape, layers, units, dist='normal', act=tf.nn.elu): + self._shape = shape + self._layers = layers + self._units = units + self._dist = dist + self._act = act + + def __call__(self, features): + x = features + for index in range(self._layers): + x = self.get(f'h{index}', tfkl.Dense, self._units, self._act)(x) + x = self.get(f'hout', tfkl.Dense, np.prod(self._shape))(x) + x = tf.reshape(x, tf.concat([tf.shape(features)[:-1], self._shape], 0)) + if self._dist == 'normal': + return tfd.Independent(tfd.Normal(x, 1), len(self._shape)) + if self._dist == 'binary': + return tfd.Independent(tfd.Bernoulli(x), len(self._shape)) + raise NotImplementedError(self._dist) + + +class ActionDecoder(tools.Module): + + def __init__( + self, size, layers, units, dist='tanh_normal', act=tf.nn.elu, + min_std=1e-4, init_std=5, mean_scale=5): + self._size = size + self._layers = layers + self._units = units + self._dist = dist + self._act = act + self._min_std = min_std + self._init_std = init_std + self._mean_scale = mean_scale + + def __call__(self, features): + raw_init_std = np.log(np.exp(self._init_std) - 1) + x = features + for index in range(self._layers): + x = self.get(f'h{index}', tfkl.Dense, self._units, self._act)(x) + if self._dist == 'tanh_normal': + # https://www.desmos.com/calculator/rcmcf5jwe7 + x = self.get(f'hout', tfkl.Dense, 2 * self._size)(x) + mean, std = tf.split(x, 2, -1) + mean = self._mean_scale * tf.tanh(mean / self._mean_scale) + std = tf.nn.softplus(std + raw_init_std) + self._min_std + dist = tfd.Normal(mean, std) + dist = tfd.TransformedDistribution(dist, tools.TanhBijector()) + dist = tfd.Independent(dist, 1) + dist = tools.SampleDist(dist) + elif self._dist == 'onehot': + x = self.get(f'hout', tfkl.Dense, self._size)(x) + dist = tools.OneHotDist(x) + else: + raise NotImplementedError(dist) + return dist diff --git a/Dreamer/run.py b/Dreamer/run.py new file mode 100644 index 0000000..c8ddc12 --- /dev/null +++ b/Dreamer/run.py @@ -0,0 +1,121 @@ +import wrappers +import tools +import tensorflow as tf +import argparse +import functools +import yaml +import os +import pathlib +import sys + +from tensorflow.keras.mixed_precision import experimental as prec +from dreamers import Dreamer, SeparationDreamer, InverseDreamer +from env_tools import count_steps, make_env + +METHOD2DREAMER = { + 'dreamer': Dreamer, + 'tia': SeparationDreamer, + 'inverse': InverseDreamer +} + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +os.environ['MUJOCO_GL'] = 'egl' +tf.get_logger().setLevel('ERROR') +sys.path.append(str(pathlib.Path(__file__).parent)) + + +def main(method, config): + + if method == 'separation': + config.logdir = os.path.join( + config.logdir, config.task, + 'separation' + '_' + str(config.disen_neg_rew_scale) + + '_' + str(config.disen_rec_scale), + str(config.seed)) + else: + config.logdir = os.path.join( + config.logdir, config.task, + method, + str(config.seed)) + + logdir = pathlib.Path(config.logdir) + logdir.mkdir(parents=True, exist_ok=True) + snapshot_dir = os.path.join(config.logdir, 'snapshots') + snapshot_dir = pathlib.Path(snapshot_dir) + snapshot_dir.mkdir(parents=True, exist_ok=True) + with open(os.path.join(config.logdir, 'config.yaml'), 'w') as f: + yaml.dump(vars(config), f, sort_keys=False) + + if config.gpu_growth: + for gpu in tf.config.experimental.list_physical_devices('GPU'): + tf.config.experimental.set_memory_growth(gpu, True) + assert config.precision in (16, 32), config.precision + if config.precision == 16: + prec.set_policy(prec.Policy('mixed_float16')) + config.steps = int(config.steps) + config.logdir = logdir + print('Logdir', config.logdir) + + # Create environments. + datadir = config.logdir / 'episodes' + writer = tf.summary.create_file_writer( + str(config.logdir), max_queue=1000, flush_millis=20000) + writer.set_as_default() + train_envs = [wrappers.Async(lambda: make_env( + config, writer, 'train', datadir, config.video_dir_train, store=True), config.parallel) + for _ in range(config.envs)] + test_envs = [wrappers.Async(lambda: make_env( + config, writer, 'test', datadir, config.video_dir_test, store=False), config.parallel) + for _ in range(config.envs)] + actspace = train_envs[0].action_space + + # Prefill dataset with random episodes. + step = count_steps(datadir, config) + prefill = max(0, config.prefill - step) + print(f'Prefill dataset with {prefill} steps.') + def random_agent(o, d, _): return ([actspace.sample() for _ in d], None) + tools.simulate(random_agent, train_envs, prefill / config.action_repeat) + writer.flush() + + # Train and regularly evaluate the agent. + step = count_steps(datadir, config) + print(f'Simulating agent for {config.steps-step} steps.') + DreamerModel = METHOD2DREAMER[method] + agent = DreamerModel(config, datadir, actspace, writer) + if (config.logdir / 'variables.pkl').exists(): + print('Load checkpoint.') + agent.load(config.logdir / 'variables.pkl') + state = None + should_snapshot = tools.Every(config.snapshot_every) + while step < config.steps: + print('Start evaluation.') + tools.simulate( + functools.partial(agent, training=False), test_envs, episodes=1) + writer.flush() + print('Start collection.') + steps = config.eval_every // config.action_repeat + state = tools.simulate(agent, train_envs, steps, state=state) + step = count_steps(datadir, config) + agent.save(config.logdir / 'variables.pkl') + if should_snapshot(step): + agent.save(snapshot_dir / ('variables_' + str(step) + '.pkl')) + for env in train_envs + test_envs: + env.close() + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--method', type=str, choices=['dreamer', 'inverse', 'tia'], required=True) + parser.add_argument('--configs', nargs='+', required=True) + args, remaining = parser.parse_known_args() + config_path = 'train_configs/' + args.method + '.yaml' + configs = yaml.safe_load( + (pathlib.Path(__file__).parent / config_path).read_text()) + config_ = {} + for name in args.configs: + config_.update(configs[name]) + parser = argparse.ArgumentParser() + for key, value in config_.items(): + parser.add_argument( + f'--{key}', type=tools.args_type(value), default=value) + main(args.method, parser.parse_args(remaining)) diff --git a/Dreamer/tools.py b/Dreamer/tools.py new file mode 100644 index 0000000..f6909de --- /dev/null +++ b/Dreamer/tools.py @@ -0,0 +1,474 @@ +import datetime +import io +import pathlib +import pickle +import re +import uuid + +import gym +import numpy as np +import tensorflow as tf +import tensorflow.compat.v1 as tf1 # pylint: disable=E +import tensorflow_probability as tfp +from tensorflow.keras.mixed_precision import experimental as prec +from tensorflow_probability import distributions as tfd + + +class AttrDict(dict): + + __setattr__ = dict.__setitem__ + __getattr__ = dict.__getitem__ + + def from_dict(self, src_dict): + for key in src_dict: + setattr(self, key, src_dict[key]) + + +class Module(tf.Module): + + def save(self, filename): + values = tf.nest.map_structure(lambda x: x.numpy(), self.variables) + with pathlib.Path(filename).open('wb') as f: + pickle.dump(values, f) + + def load(self, filename): + with pathlib.Path(filename).open('rb') as f: + values = pickle.load(f) + tf.nest.map_structure(lambda x, y: x.assign(y), self.variables, values) + + def get(self, name, ctor, *args, **kwargs): + # Create or get layer by name to avoid mentioning it in the constructor. + if not hasattr(self, '_modules'): + self._modules = {} + if name not in self._modules: + self._modules[name] = ctor(*args, **kwargs) + return self._modules[name] + + +def nest_summary(structure): + if isinstance(structure, dict): + return {k: nest_summary(v) for k, v in structure.items()} + if isinstance(structure, list): + return [nest_summary(v) for v in structure] + if hasattr(structure, 'shape'): + return str(structure.shape).replace(', ', 'x').strip('(), ') + return '?' + + +def graph_summary(writer, fn, step, *args): + def inner(*args): + tf.summary.experimental.set_step(step) + with writer.as_default(): + fn(*args) + return tf.numpy_function(inner, args, []) + + +def video_summary(name, video, step=None, fps=20): + if isinstance(name, type(np.zeros(1))): + name = str(name) + else: + name = name if isinstance(name, str) else name.decode('utf-8') + if np.issubdtype(video.dtype, np.floating): + video = np.clip(255 * video, 0, 255).astype(np.uint8) + B, T, H, W, C = video.shape + try: + frames = video.transpose((1, 2, 0, 3, 4)).reshape((T, H, B * W, C)) + summary = tf1.Summary() + image = tf1.Summary.Image(height=B * H, width=T * W, colorspace=C) + image.encoded_image_string = encode_gif(frames, fps) + summary.value.add(tag=name + '/gif', image=image) + tf.summary.experimental.write_raw_pb(summary.SerializeToString(), step) + except (IOError, OSError) as e: + print('GIF summaries require ffmpeg in $PATH.', e) + frames = video.transpose((0, 2, 1, 3, 4)).reshape((1, B * H, T * W, C)) + tf.summary.image(name + '/grid', frames, step) + + +def encode_gif(frames, fps): + from subprocess import Popen, PIPE + print(frames[0].shape) + if frames[0].shape[-1] > 3: + frames = np.transpose(frames, [0, 2, 3, 1]) + h, w, c = frames[0].shape + print(frames[0].shape) + + if c!=64: + pxfmt = {1: 'gray', 3: 'rgb24'}[c] + cmd = ' '.join([ + f'ffmpeg -y -f rawvideo -vcodec rawvideo', + f'-r {fps:.02f} -s {w}x{h} -pix_fmt {pxfmt} -i - -filter_complex', + f'[0:v]split[x][z];[z]palettegen[y];[x]fifo[x];[x][y]paletteuse', + f'-r {fps:.02f} -f gif -']) + proc = Popen(cmd.split(' '), stdin=PIPE, stdout=PIPE, stderr=PIPE) + for image in frames: + proc.stdin.write(image.tostring()) + out, err = proc.communicate() + if proc.returncode: + raise IOError('\n'.join([' '.join(cmd), err.decode('utf8')])) + del proc + return out + + +def simulate(agent, envs, steps=0, episodes=0, state=None): + # Initialize or unpack simulation state. + if state is None: + step, episode = 0, 0 + done = np.ones(len(envs), np.bool) + length = np.zeros(len(envs), np.int32) + obs = [None] * len(envs) + agent_state = None + else: + step, episode, done, length, obs, agent_state = state + while (steps and step < steps) or (episodes and episode < episodes): + # Reset envs if necessary. + if done.any(): + indices = [index for index, d in enumerate(done) if d] + promises = [envs[i].reset(blocking=False) for i in indices] + for index, promise in zip(indices, promises): + obs[index] = promise() + # Step agents. + # if use augmentation, need to modify dreamer.policy or here. + obs = {k: np.stack([o[k] for o in obs]) for k in obs[0]} + obs['image'] = tf.transpose(obs['image'], [0, 3, 2, 1]) + action, agent_state = agent(obs, done, agent_state) + action = np.array(action) + assert len(action) == len(envs) + # Step envs. + promises = [e.step(a, blocking=False) for e, a in zip(envs, action)] + obs, _, done = zip(*[p()[:3] for p in promises]) + obs = list(obs) + done = np.stack(done) + episode += int(done.sum()) + length += 1 + step += (done * length).sum() + length *= (1 - done) + # Return new state to allow resuming the simulation. + return (step - steps, episode - episodes, done, length, obs, agent_state) + + +def count_episodes(directory): + filenames = directory.glob('*.npz') + lengths = [int(n.stem.rsplit('-', 1)[-1]) - 1 for n in filenames] + episodes, steps = len(lengths), sum(lengths) + return episodes, steps + + +def save_episodes(directory, episodes): + directory = pathlib.Path(directory).expanduser() + directory.mkdir(parents=True, exist_ok=True) + timestamp = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') + for episode in episodes: + identifier = str(uuid.uuid4().hex) + length = len(episode['reward']) + filename = directory / f'{timestamp}-{identifier}-{length}.npz' + with io.BytesIO() as f1: + np.savez_compressed(f1, **episode) + f1.seek(0) + with filename.open('wb') as f2: + f2.write(f1.read()) + + +def load_episodes(directory, rescan, length=None, balance=False, seed=0): + directory = pathlib.Path(directory).expanduser() + random = np.random.RandomState(seed) + cache = {} + while True: + for filename in directory.glob('*.npz'): + if filename not in cache: + try: + with filename.open('rb') as f: + episode = np.load(f) + episode = {k: episode[k] for k in episode.keys()} + except Exception as e: + print(f'Could not load episode: {e}') + continue + cache[filename] = episode + keys = list(cache.keys()) + for index in random.choice(len(keys), rescan): + episode = cache[keys[index]] + if length: + total = len(next(iter(episode.values()))) + available = total - length + if available < 1: + print(f'Skipped short episode of length {available}({total}/{length}).') + continue + if balance: + index = min(random.randint(0, total), available) + else: + index = int(random.randint(0, available)) + episode = {k: v[index: index + length] + for k, v in episode.items()} + yield episode + +class DummyEnv: + + def __init__(self): + self._random = np.random.RandomState(seed=0) + self._step = None + + @property + def observation_space(self): + low = np.zeros([64, 64, 3], dtype=np.uint8) + high = 255 * np.ones([64, 64, 3], dtype=np.uint8) + spaces = {'image': gym.spaces.Box(low, high)} + return gym.spaces.Dict(spaces) + + @property + def action_space(self): + low = -np.ones([5], dtype=np.float32) + high = np.ones([5], dtype=np.float32) + return gym.spaces.Box(low, high) + + def reset(self): + self._step = 0 + obs = self.observation_space.sample() + return obs + + def step(self, action): + obs = self.observation_space.sample() + reward = self._random.uniform(0, 1) + self._step += 1 + done = self._step >= 1000 + info = {} + return obs, reward, done, info + + +class SampleDist: + + def __init__(self, dist, samples=100): + self._dist = dist + self._samples = samples + + @property + def name(self): + return 'SampleDist' + + def __getattr__(self, name): + return getattr(self._dist, name) + + def mean(self): + samples = self._dist.sample(self._samples) + return tf.reduce_mean(samples, 0) + + def mode(self): + sample = self._dist.sample(self._samples) + logprob = self._dist.log_prob(sample) + return tf.gather(sample, tf.argmax(logprob))[0] # pylint: disable=E + + def entropy(self): + sample = self._dist.sample(self._samples) + logprob = self.log_prob(sample) + return -tf.reduce_mean(logprob, 0) + + +class OneHotDist: + + def __init__(self, logits=None, probs=None): + self._dist = tfd.Categorical(logits=logits, probs=probs) + self._num_classes = self.mean().shape[-1] + self._dtype = prec.global_policy().compute_dtype + + @property + def name(self): + return 'OneHotDist' + + def __getattr__(self, name): + return getattr(self._dist, name) + + def prob(self, events): + indices = tf.argmax(events, axis=-1) + return self._dist.prob(indices) + + def log_prob(self, events): + indices = tf.argmax(events, axis=-1) + return self._dist.log_prob(indices) + + def mean(self): + return self._dist.probs_parameter() + + def mode(self): + return self._one_hot(self._dist.mode()) + + def sample(self, amount=None): + amount = [amount] if amount else [] + indices = self._dist.sample(*amount) + sample = self._one_hot(indices) + probs = self._dist.probs_parameter() + sample += tf.cast(probs - tf.stop_gradient(probs), self._dtype) + return sample + + def _one_hot(self, indices): + return tf.one_hot(indices, self._num_classes, dtype=self._dtype) # pylint: disable=E + + + +class TanhBijector(tfp.bijectors.Bijector): + + def __init__(self, validate_args=False, name='tanh'): + super().__init__( + forward_min_event_ndims=0, + validate_args=validate_args, + name=name) + + def _forward(self, x): + return tf.nn.tanh(x) + + def _inverse(self, y): + dtype = y.dtype + y = tf.cast(y, tf.float32) + y = tf.where( + tf.less_equal(tf.abs(y), 1.), + tf.clip_by_value(y, -0.99999997, 0.99999997), y) + y = tf.atanh(y) + y = tf.cast(y, dtype) + return y + + def _forward_log_det_jacobian(self, x): + log2 = tf.math.log(tf.constant(2.0, dtype=x.dtype)) + return 2.0 * (log2 - x - tf.nn.softplus(-2.0 * x)) + + +def lambda_return( + reward, value, pcont, bootstrap, lambda_, axis): + # Setting lambda=1 gives a discounted Monte Carlo return. + # Setting lambda=0 gives a fixed 1-step return. + assert reward.shape.ndims == value.shape.ndims, (reward.shape, value.shape) + if isinstance(pcont, (int, float)): + pcont = pcont * tf.ones_like(reward) + dims = list(range(reward.shape.ndims)) + dims = [axis] + dims[1:axis] + [0] + dims[axis + 1:] + if axis != 0: + reward = tf.transpose(reward, dims) + value = tf.transpose(value, dims) + pcont = tf.transpose(pcont, dims) + if bootstrap is None: + bootstrap = tf.zeros_like(value[-1]) + next_values = tf.concat([value[1:], bootstrap[None]], 0) + inputs = reward + pcont * next_values * (1 - lambda_) + returns = static_scan( + lambda agg, cur: cur[0] + cur[1] * lambda_ * agg, + (inputs, pcont), bootstrap, reverse=True) + if axis != 0: + returns = tf.transpose(returns, dims) + return returns + + +class Adam(tf.Module): + + def __init__(self, name, modules, lr, clip=None, wd=None, wdpattern=r'.*'): + self._name = name + self._modules = modules + self._clip = clip + self._wd = wd + self._wdpattern = wdpattern + self._opt = tf.optimizers.Adam(lr) + self._opt = prec.LossScaleOptimizer(self._opt, 'dynamic') + self._variables = None + + @property + def variables(self): + return self._opt.variables() + + def __call__(self, tape, loss): + if self._variables is None: + variables = [module.variables for module in self._modules] + self._variables = tf.nest.flatten(variables) + count = sum(np.prod(x.shape) for x in self._variables) + print(f'Found {count} {self._name} parameters.') + assert len(loss.shape) == 0, loss.shape + with tape: + loss = self._opt.get_scaled_loss(loss) + grads = tape.gradient(loss, self._variables) + grads = self._opt.get_unscaled_gradients(grads) + norm = tf.linalg.global_norm(grads) + if self._clip: + grads, _ = tf.clip_by_global_norm(grads, self._clip, norm) + if self._wd: + context = tf.distribute.get_replica_context() + context.merge_call(self._apply_weight_decay) + self._opt.apply_gradients(zip(grads, self._variables)) + return norm + + def _apply_weight_decay(self, strategy): + print('Applied weight decay to variables:') + for var in self._variables: + if re.search(self._wdpattern, self._name + '/' + var.name): + print('- ' + self._name + '/' + var.name) + strategy.extended.update(var, lambda var: self._wd * var) + + +def args_type(default): + if isinstance(default, bool): + return lambda x: bool(['False', 'True'].index(x)) + if isinstance(default, int): + return lambda x: float(x) if ('e' in x or '.' in x) else int(x) + if isinstance(default, pathlib.Path): + return lambda x: pathlib.Path(x).expanduser() + return type(default) + + +def static_scan(fn, inputs, start, reverse=False): + last = start + outputs = [[] for _ in tf.nest.flatten(start)] + indices = range(len(tf.nest.flatten(inputs)[0])) + if reverse: + indices = reversed(indices) + for index in indices: + inp = tf.nest.map_structure(lambda x: x[index], inputs) + last = fn(last, inp) + [o.append(l) for o, l in zip(outputs, tf.nest.flatten(last))] + if reverse: + outputs = [list(reversed(x)) for x in outputs] + outputs = [tf.stack(x, 0) for x in outputs] + return tf.nest.pack_sequence_as(start, outputs) + + +def _mnd_sample(self, sample_shape=(), seed=None, name='sample'): + return tf.random.normal( + tuple(sample_shape) + tuple(self.event_shape), + self.mean(), self.stddev(), self.dtype, seed, name) + + +tfd.MultivariateNormalDiag.sample = _mnd_sample + + +def _cat_sample(self, sample_shape=(), seed=None, name='sample'): + assert len(sample_shape) in (0, 1), sample_shape + assert len(self.logits_parameter().shape) == 2 + indices = tf.random.categorical( + self.logits_parameter(), sample_shape[0] if sample_shape else 1, + self.dtype, seed, name) + if not sample_shape: + indices = indices[..., 0] + return indices + + +tfd.Categorical.sample = _cat_sample + + +class Every: + + def __init__(self, every): + self._every = every + self._last = None + + def __call__(self, step): + if self._last is None: + self._last = step + return True + if step >= self._last + self._every: + self._last += self._every + return True + return False + + +class Once: + + def __init__(self): + self._once = True + + def __call__(self): + if self._once: + self._once = False + return True + return False diff --git a/Dreamer/train_configs/dreamer.yaml b/Dreamer/train_configs/dreamer.yaml new file mode 100644 index 0000000..6444ba6 --- /dev/null +++ b/Dreamer/train_configs/dreamer.yaml @@ -0,0 +1,74 @@ +dmc: + + logdir: /media/vedant/cpsDataStorageWK/Vedant/tia_logs + video_dir_train: /media/vedant/cpsDataStorageWK/Vedant/natural_video_setting/train/ + video_dir_test: /media/vedant/cpsDataStorageWK/Vedant/natural_video_setting/test/ + debug: False + seed: 0 + steps: 1000000.0 + snapshot_every: 20000.0 + eval_every: 5000.0 + log_every: 5000.0 + image_size: 64 + log_scalars: true + log_images: true + gpu_growth: true + precision: 16 + task: dmc_cheetah_run_driving + envs: 1 + parallel: none + action_repeat: 2 + time_limit: 1000 + prefill: 5000 + eval_noise: 0.0 + clip_rewards: none + deter_size: 283 + stoch_size: 42 + num_units: 400 + dense_act: elu + cnn_act: relu + cnn_depth: 45 + pcont: false + free_nats: 3.0 + kl_scale: 1.0 + pcont_scale: 10.0 + weight_decay: 0.0 + weight_decay_pattern: .* + batch_size: 50 + batch_length: 50 + train_every: 1000 + train_steps: 100 + pretrain: 100 + model_lr: 0.0006 + value_lr: 8.0e-05 + actor_lr: 8.0e-05 + grad_clip: 100.0 + dataset_balance: false + discount: 0.99 + disclam: 0.95 + horizon: 15 + action_dist: tanh_normal + action_init_std: 5.0 + expl: additive_gaussian + expl_amount: 0.3 + expl_decay: 0.0 + expl_min: 0.0 + +debug: + + debug: True + pretrain: 1 + prefill: 1 + train_steps: 1 + deter_size: 20 + stoch_size: 3 + num_units: 40 + cnn_depth: 3 + batch_size: 10 + batch_length: 20 + + + + + + diff --git a/Dreamer/train_configs/inverse.yaml b/Dreamer/train_configs/inverse.yaml new file mode 100644 index 0000000..57b6fdc --- /dev/null +++ b/Dreamer/train_configs/inverse.yaml @@ -0,0 +1,66 @@ +dmc: + + logdir: ./ + video_dir: ./ + debug: False + seed: 0 + steps: 1000000.0 + snapshot_every: 20000.0 + eval_every: 5000.0 + log_every: 5000.0 + image_size: 64 + log_scalars: true + log_images: false + gpu_growth: true + precision: 16 + task: dmc_cheetah_run_driving + envs: 1 + parallel: none + action_repeat: 2 + time_limit: 1000 + prefill: 5000 + eval_noise: 0.0 + clip_rewards: none + deter_size: 283 + stoch_size: 42 + num_units: 400 + dense_act: elu + cnn_act: relu + cnn_depth: 45 + pcont: false + free_nats: 3.0 + kl_scale: 1.0 + pcont_scale: 10.0 + weight_decay: 0.0 + weight_decay_pattern: .* + batch_size: 50 + batch_length: 50 + train_every: 1000 + train_steps: 100 + pretrain: 100 + model_lr: 0.0006 + value_lr: 8.0e-05 + actor_lr: 8.0e-05 + grad_clip: 100.0 + dataset_balance: false + discount: 0.99 + disclam: 0.95 + horizon: 15 + action_dist: tanh_normal + action_init_std: 5.0 + expl: additive_gaussian + expl_amount: 0.3 + expl_decay: 0.0 + expl_min: 0.0 + +debug: + + debug: True + pretrain: 1 + prefill: 1 + train_steps: 1 + batch_size: 10 + batch_length: 20 + + + diff --git a/Dreamer/train_configs/tia.yaml b/Dreamer/train_configs/tia.yaml new file mode 100644 index 0000000..f0df1f7 --- /dev/null +++ b/Dreamer/train_configs/tia.yaml @@ -0,0 +1,76 @@ +dmc: + + logdir: /media/vedant/cpsDataStorageWK/Vedant/tia_logs + video_dir_train: /media/vedant/cpsDataStorageWK/Vedant/natural_video_setting/train/ + video_dir_test: /media/vedant/cpsDataStorageWK/Vedant/natural_video_setting/test/ + debug: False + seed: 0 + steps: 1000000.0 + snapshot_every: 20000.0 + eval_every: 5000.0 + log_every: 5000.0 + image_size: 64 + log_scalars: true + log_images: true + gpu_growth: true + precision: 16 + task: dmc_cheetah_run_driving + envs: 1 + parallel: none + action_repeat: 2 + time_limit: 1000 + prefill: 5000 + eval_noise: 0.0 + clip_rewards: none + deter_size: 200 + stoch_size: 30 + disen_deter_size: 200 + disen_stoch_size: 30 + num_units: 400 + dense_act: elu + cnn_act: relu + cnn_depth: 26 + disen_cnn_depth: 26 + pcont: false + free_nats: 3.0 + num_reward_opt_iters: 20 + disen_neg_rew_scale: 20000.0 + disen_rec_scale: 1.5 + disen_kl_scale: 1.0 + kl_scale: 1.0 + reward_scale: 1.0 + pcont_scale: 10.0 + weight_decay: 0.0 + weight_decay_pattern: .* + batch_size: 50 + batch_length: 50 + train_every: 1000 + train_steps: 100 + pretrain: 100 + disen_reward_lr: 0.0006 + model_lr: 0.0006 + value_lr: 8.0e-05 + actor_lr: 8.0e-05 + grad_clip: 100.0 + dataset_balance: false + discount: 0.99 + disclam: 0.95 + horizon: 15 + action_dist: tanh_normal + action_init_std: 5.0 + expl: additive_gaussian + expl_amount: 0.3 + expl_decay: 0.0 + expl_min: 0.0 + +debug: + + debug: True + pretrain: 1 + prefill: 1 + train_steps: 1 + batch_size: 10 + batch_length: 20 + + + diff --git a/Dreamer/wrappers.py b/Dreamer/wrappers.py new file mode 100644 index 0000000..abdd142 --- /dev/null +++ b/Dreamer/wrappers.py @@ -0,0 +1,540 @@ +import atexit +import functools +import sys +import threading +import traceback + +import gym +import numpy as np +from PIL import Image +from collections import deque + +from numpy.core import overrides + + +class DMC2GYMWrapper: + + def __init__(self, env): + self._env = env + + def __getattr__(self, name): + return getattr(self._env, name) + + @property + def observation_space(self): + spaces = {} + spaces['image'] = gym.spaces.Box( + 0, 255, (self._env._height, self._env._width, 3,), dtype=np.uint8) + return gym.spaces.Dict(spaces) + + def step(self, action): + image, reward, done, info = self._env.step(action) + obs = {'image': image} + return obs, reward, done, info + + def reset(self): + image = self._env.reset() + obs = {'image': image} + return obs + + +class DeepMindControl: + + def __init__(self, name, size=(64, 64), camera=None): + domain, task = name.split('_', 1) + if domain == 'cup': # Only domain with multiple words. + domain = 'ball_in_cup' + if isinstance(domain, str): + from dm_control import suite + self._env = suite.load(domain, task) + else: + assert task is None + self._env = domain() + self._size = size + if camera is None: + camera = dict(quadruped=2).get(domain, 0) + self._camera = camera + + @property + def observation_space(self): + spaces = {} + for key, value in self._env.observation_spec().items(): + spaces[key] = gym.spaces.Box( + -np.inf, np.inf, value.shape, dtype=np.float32) + spaces['image'] = gym.spaces.Box( + 0, 255, self._size + (3,), dtype=np.uint8) + return gym.spaces.Dict(spaces) + + @property + def action_space(self): + spec = self._env.action_spec() + return gym.spaces.Box(spec.minimum, spec.maximum, dtype=np.float32) + + def step(self, action): + time_step = self._env.step(action) + obs = dict(time_step.observation) + obs['image'] = self.render() + reward = time_step.reward or 0 + done = time_step.last() + info = {'discount': np.array(time_step.discount, np.float32)} + return obs, reward, done, info + + def reset(self): + time_step = self._env.reset() + obs = dict(time_step.observation) + obs['image'] = self.render() + return obs + + def render(self, *args, **kwargs): + if kwargs.get('mode', 'rgb_array') != 'rgb_array': + raise ValueError("Only render mode 'rgb_array' is supported.") + return self._env.physics.render(*self._size, camera_id=self._camera) + + +class Atari: + + LOCK = threading.Lock() + + def __init__( + self, name, action_repeat=4, size=(84, 84), grayscale=True, noops=30, + life_done=False, sticky_actions=True): + import gym + version = 0 if sticky_actions else 4 + name = ''.join(word.title() for word in name.split('_')) + with self.LOCK: + self._env = gym.make('{}NoFrameskip-v{}'.format(name, version)) + self._action_repeat = action_repeat + self._size = size + self._grayscale = grayscale + self._noops = noops + self._life_done = life_done + self._lives = None + shape = self._env.observation_space.shape[:2] + \ + (() if grayscale else (3,)) + self._buffers = [np.empty(shape, dtype=np.uint8) for _ in range(2)] + self._random = np.random.RandomState(seed=None) + + @property + def observation_space(self): + shape = self._size + (1 if self._grayscale else 3,) + space = gym.spaces.Box(low=0, high=255, shape=shape, dtype=np.uint8) + return gym.spaces.Dict({'image': space}) + + @property + def action_space(self): + return self._env.action_space + + def close(self): + return self._env.close() + + def reset(self): + with self.LOCK: + self._env.reset() + noops = self._random.randint(1, self._noops + 1) + for _ in range(noops): + done = self._env.step(0)[2] + if done: + with self.LOCK: + self._env.reset() + self._lives = self._env.ale.lives() + if self._grayscale: + self._env.ale.getScreenGrayscale(self._buffers[0]) + else: + self._env.ale.getScreenRGB2(self._buffers[0]) + self._buffers[1].fill(0) + return self._get_obs() + + def step(self, action): + total_reward = 0.0 + for step in range(self._action_repeat): + _, reward, done, info = self._env.step(action) + total_reward += reward + if self._life_done: + lives = self._env.ale.lives() + done = done or lives < self._lives + self._lives = lives + if done: + break + elif step >= self._action_repeat - 2: + index = step - (self._action_repeat - 2) + if self._grayscale: + self._env.ale.getScreenGrayscale(self._buffers[index]) + else: + self._env.ale.getScreenRGB2(self._buffers[index]) + obs = self._get_obs() + return obs, total_reward, done, info + + def render(self, mode): + return self._env.render(mode) + + def _get_obs(self): + if self._action_repeat > 1: + np.maximum(self._buffers[0], + self._buffers[1], out=self._buffers[0]) + image = np.array(Image.fromarray(self._buffers[0]).resize( + self._size, Image.BILINEAR)) + image = np.clip(image, 0, 255).astype(np.uint8) + image = image[:, :, None] if self._grayscale else image + return {'image': image} + + +class Collect: + + def __init__(self, env, callbacks=None, precision=32): + self._env = env + self._callbacks = callbacks or () + self._precision = precision + self._episode = None + + def __getattr__(self, name): + return getattr(self._env, name) + + def step(self, action): + obs, reward, done, info = self._env.step(action) + obs = {k: self._convert(v) for k, v in obs.items()} + transition = obs.copy() + transition['action'] = action + transition['reward'] = reward + transition['discount'] = info.get( + 'discount', np.array(1 - float(done))) + self._episode.append(transition) + if done: + episode = {k: [t[k] for t in self._episode] + for k in self._episode[0]} + episode = {k: self._convert(v) for k, v in episode.items()} + info['episode'] = episode + for callback in self._callbacks: + callback(episode) + return obs, reward, done, info + + def reset(self): + obs = self._env.reset() + transition = obs.copy() + transition['action'] = np.zeros(self._env.action_space.shape) + transition['reward'] = 0.0 + transition['discount'] = 1.0 + self._episode = [transition] + return obs + + def _convert(self, value): + value = np.array(value) + if np.issubdtype(value.dtype, np.floating): + dtype = {16: np.float16, 32: np.float32, + 64: np.float64}[self._precision] + elif np.issubdtype(value.dtype, np.signedinteger): + dtype = {16: np.int16, 32: np.int32, 64: np.int64}[self._precision] + elif np.issubdtype(value.dtype, np.uint8): + dtype = np.uint8 + else: + raise NotImplementedError(value.dtype) + return value.astype(dtype) + + +class TimeLimit: + + def __init__(self, env, duration): + self._env = env + self._duration = duration + self._step = None + + def __getattr__(self, name): + return getattr(self._env, name) + + def step(self, action): + assert self._step is not None, 'Must reset environment.' + obs, reward, done, info = self._env.step(action) + self._step += 1 + if self._step >= self._duration: + done = True + if 'discount' not in info: + info['discount'] = np.array(1.0).astype(np.float32) + self._step = None + return obs, reward, done, info + + def reset(self): + self._step = 0 + return self._env.reset() + + +class ActionRepeat: + + def __init__(self, env, amount): + self._env = env + self._amount = amount + + def __getattr__(self, name): + return getattr(self._env, name) + + def step(self, action): + done = False + total_reward = 0 + current_step = 0 + while current_step < self._amount and not done: + obs, reward, done, info = self._env.step(action) + total_reward += reward + current_step += 1 + return obs, total_reward, done, info + + +class NormalizeActions: + + def __init__(self, env): + self._env = env + self._mask = np.logical_and( + np.isfinite(env.action_space.low), + np.isfinite(env.action_space.high)) + self._low = np.where(self._mask, env.action_space.low, -1) + self._high = np.where(self._mask, env.action_space.high, 1) + + def __getattr__(self, name): + return getattr(self._env, name) + + @property + def action_space(self): + low = np.where(self._mask, -np.ones_like(self._low), self._low) + high = np.where(self._mask, np.ones_like(self._low), self._high) + return gym.spaces.Box(low, high, dtype=np.float32) + + def step(self, action): + original = (action + 1) / 2 * (self._high - self._low) + self._low + original = np.where(self._mask, original, action) + return self._env.step(original) + + +class ObsDict: + + def __init__(self, env, key='obs'): + self._env = env + self._key = key + + def __getattr__(self, name): + return getattr(self._env, name) + + @property + def observation_space(self): + spaces = {self._key: self._env.observation_space} + return gym.spaces.Dict(spaces) + + @property + def action_space(self): + return self._env.action_space + + def step(self, action): + obs, reward, done, info = self._env.step(action) + obs = {self._key: np.array(obs)} + return obs, reward, done, info + + def reset(self): + obs = self._env.reset() + obs = {self._key: np.array(obs)} + return obs + + +class OneHotAction: + + def __init__(self, env): + assert isinstance(env.action_space, gym.spaces.Discrete) + self._env = env + + def __getattr__(self, name): + return getattr(self._env, name) + + @property + def action_space(self): + shape = (self._env.action_space.n,) + space = gym.spaces.Box(low=0, high=1, shape=shape, dtype=np.float32) + space.sample = self._sample_action + return space + + def step(self, action): + index = np.argmax(action).astype(int) + reference = np.zeros_like(action) + reference[index] = 1 + if not np.allclose(reference, action): + raise ValueError(f'Invalid one-hot action:\n{action}') + return self._env.step(index) + + def reset(self): + return self._env.reset() + + def _sample_action(self): + actions = self._env.action_space.n + index = self._random.randint(0, actions) + reference = np.zeros(actions, dtype=np.float32) + reference[index] = 1.0 + return reference + + +class RewardObs: + + def __init__(self, env): + self._env = env + + def __getattr__(self, name): + return getattr(self._env, name) + + @property + def observation_space(self): + spaces = self._env.observation_space.spaces + assert 'reward' not in spaces + spaces['reward'] = gym.spaces.Box(-np.inf, np.inf, dtype=np.float32) + return gym.spaces.Dict(spaces) + + def step(self, action): + obs, reward, done, info = self._env.step(action) + obs['reward'] = reward + return obs, reward, done, info + + def reset(self): + obs = self._env.reset() + obs['reward'] = 0.0 + return obs + + +class Async: + + _ACCESS = 1 + _CALL = 2 + _RESULT = 3 + _EXCEPTION = 4 + _CLOSE = 5 + + def __init__(self, ctor, strategy='process'): + self._strategy = strategy + if strategy == 'none': + self._env = ctor() + elif strategy == 'thread': + import multiprocessing.dummy as mp + elif strategy == 'process': + import multiprocessing as mp + else: + raise NotImplementedError(strategy) + if strategy != 'none': + self._conn, conn = mp.Pipe() + self._process = mp.Process(target=self._worker, args=(ctor, conn)) + atexit.register(self.close) + self._process.start() + self._obs_space = None + self._action_space = None + + @property + def observation_space(self): + if not self._obs_space: + self._obs_space = self.__getattr__('observation_space') + return self._obs_space + + @property + def action_space(self): + if not self._action_space: + self._action_space = self.__getattr__('action_space') + return self._action_space + + def __getattr__(self, name): + if self._strategy == 'none': + return getattr(self._env, name) + self._conn.send((self._ACCESS, name)) + return self._receive() + + def call(self, name, *args, **kwargs): + blocking = kwargs.pop('blocking', True) + if self._strategy == 'none': + return functools.partial(getattr(self._env, name), *args, **kwargs) + payload = name, args, kwargs + self._conn.send((self._CALL, payload)) + promise = self._receive + return promise() if blocking else promise + + def close(self): + if self._strategy == 'none': + try: + self._env.close() + except AttributeError: + pass + return + try: + self._conn.send((self._CLOSE, None)) + self._conn.close() + except IOError: + # The connection was already closed. + pass + self._process.join() + + def step(self, action, blocking=True): + return self.call('step', action, blocking=blocking) + + def reset(self, blocking=True): + return self.call('reset', blocking=blocking) + + def _receive(self): + try: + message, payload = self._conn.recv() + except ConnectionResetError: + raise RuntimeError('Environment worker crashed.') + # Re-raise exceptions in the main process. + if message == self._EXCEPTION: + stacktrace = payload + raise Exception(stacktrace) + if message == self._RESULT: + return payload + raise KeyError(f'Received message of unexpected type {message}') + + def _worker(self, ctor, conn): + try: + env = ctor() + while True: + try: + # Only block for short times to have keyboard exceptions be raised. + if not conn.poll(0.1): + continue + message, payload = conn.recv() + except (EOFError, KeyboardInterrupt): + break + if message == self._ACCESS: + name = payload + result = getattr(env, name) + conn.send((self._RESULT, result)) + continue + if message == self._CALL: + name, args, kwargs = payload + result = getattr(env, name)(*args, **kwargs) + conn.send((self._RESULT, result)) + continue + if message == self._CLOSE: + assert payload is None + break + raise KeyError(f'Received message of unknown type {message}') + except Exception: + stacktrace = ''.join(traceback.format_exception(*sys.exc_info())) + print(f'Error in environment process: {stacktrace}') + conn.send((self._EXCEPTION, stacktrace)) + conn.close() + + +class FrameStack(gym.Wrapper): + def __init__(self, env, k): + gym.Wrapper.__init__(self, env) + self._k = k + self._frames = deque([], maxlen=k) + shp = env.observation_space.shape + self.observation_space = gym.spaces.Box( + low=0, + high=1, + shape=((shp[0] * k,) + shp[1:]), + dtype=env.observation_space.dtype + ) + self._max_episode_steps = env._max_episode_steps + + def reset(self): + obs = self.env.reset() + for _ in range(self._k): + self._frames.append(obs) + return self._get_obs() + + def step(self, action): + obs, reward, done, info = self.env.step(action) + self._frames.append(obs) + return self._get_obs(), reward, done, info + + def _get_obs(self): + assert len(self._frames) == self._k + return np.concatenate(list(self._frames), axis=0) From 0ac3131dad7214c33db645b70be15b3876f83bb1 Mon Sep 17 00:00:00 2001 From: VedantDave Date: Mon, 17 Jul 2023 10:48:40 +0200 Subject: [PATCH 2/2] Add Dreamerv2 Files --- DreamerV2/configs.yaml | 185 +++++++++++ DreamerV2/dreamer.py | 316 ++++++++++++++++++ DreamerV2/exploration.py | 83 +++++ DreamerV2/models.py | 429 ++++++++++++++++++++++++ DreamerV2/networks.py | 465 ++++++++++++++++++++++++++ DreamerV2/tools.py | 694 +++++++++++++++++++++++++++++++++++++++ DreamerV2/wrappers.py | 280 ++++++++++++++++ 7 files changed, 2452 insertions(+) create mode 100644 DreamerV2/configs.yaml create mode 100644 DreamerV2/dreamer.py create mode 100644 DreamerV2/exploration.py create mode 100644 DreamerV2/models.py create mode 100644 DreamerV2/networks.py create mode 100644 DreamerV2/tools.py create mode 100644 DreamerV2/wrappers.py diff --git a/DreamerV2/configs.yaml b/DreamerV2/configs.yaml new file mode 100644 index 0000000..872e3ae --- /dev/null +++ b/DreamerV2/configs.yaml @@ -0,0 +1,185 @@ +defaults: + gpu: 'none' + logdir: ./ + traindir: null + evaldir: null + offline_traindir: '' + offline_evaldir: '' + seed: 0 + steps: 1e7 + eval_every: 1e4 + log_every: 1e4 + reset_every: 0 + gpu_growth: True + precision: 32 + debug: False + expl_gifs: False + + # Environment + task: 'dmc_walker_walk' + size: [64, 64] + envs: 1 + action_repeat: 2 + time_limit: 1000 + prefill: 2500 + eval_noise: 0.0 + clip_rewards: 'identity' + atari_grayscale: False + + # Model + dyn_cell: 'gru' + dyn_hidden: 200 + dyn_deter: 200 + dyn_stoch: 50 + dyn_discrete: 0 + dyn_input_layers: 1 + dyn_output_layers: 1 + dyn_shared: False + dyn_mean_act: 'none' + dyn_std_act: 'sigmoid2' + dyn_min_std: 0.1 + grad_heads: ['image', 'reward'] + units: 400 + reward_layers: 2 + discount_layers: 3 + value_layers: 3 + actor_layers: 4 + act: 'elu' + cnn_depth: 32 + encoder_kernels: [4, 4, 4, 4] + decoder_kernels: [5, 5, 6, 6] + decoder_thin: True + value_head: 'normal' + kl_scale: '1.0' + kl_balance: '0.8' + kl_free: '1.0' + pred_discount: False + discount_scale: 1.0 + reward_scale: 1.0 + weight_decay: 0.0 + + # Training + batch_size: 50 + batch_length: 50 + train_every: 5 + train_steps: 1 + pretrain: 100 + model_lr: 3e-4 + value_lr: 8e-5 + actor_lr: 8e-5 + opt_eps: 1e-5 + grad_clip: 100 + value_grad_clip: 100 + actor_grad_clip: 100 + dataset_size: 0 + oversample_ends: False + slow_value_target: True + slow_actor_target: True + slow_target_update: 100 + slow_target_fraction: 1 + opt: 'adam' + + # Behavior. + discount: 0.99 + discount_lambda: 0.95 + imag_horizon: 15 + imag_gradient: 'dynamics' + imag_gradient_mix: '0.1' + imag_sample: True + actor_dist: 'trunc_normal' + actor_entropy: '1e-4' + actor_state_entropy: 0.0 + actor_init_std: 1.0 + actor_min_std: 0.1 + actor_disc: 5 + actor_temp: 0.1 + actor_outscale: 0.0 + expl_amount: 0.0 + eval_state_mean: False + collect_dyn_sample: True + behavior_stop_grad: True + value_decay: 0.0 + future_entropy: False + + # Exploration + expl_behavior: 'greedy' + expl_until: 0 + expl_extr_scale: 0.0 + expl_intr_scale: 1.0 + disag_target: 'stoch' + disag_log: True + disag_models: 10 + disag_offset: 1 + disag_layers: 4 + disag_units: 400 + +atari: + + # General + task: 'atari_demon_attack' + steps: 3e7 + eval_every: 1e5 + log_every: 1e4 + prefill: 50000 + dataset_size: 2e6 + pretrain: 0 + precision: 16 + + # Environment + time_limit: 108000 # 30 minutes of game play. + atari_grayscale: True + action_repeat: 4 + eval_noise: 0.001 + train_every: 16 + train_steps: 1 + clip_rewards: 'tanh' + + # Model + grad_heads: ['image', 'reward', 'discount'] + dyn_cell: 'gru_layer_norm' + pred_discount: True + cnn_depth: 48 + dyn_deter: 600 + dyn_hidden: 600 + dyn_stoch: 32 + dyn_discrete: 32 + reward_layers: 4 + discount_layers: 4 + value_layers: 4 + actor_layers: 4 + + # Behavior + actor_dist: 'onehot' + actor_entropy: 'linear(3e-3,3e-4,2.5e6)' + expl_amount: 0.0 + expl_until: 3e7 + discount: 0.995 + imag_gradient: 'both' + imag_gradient_mix: 'linear(0.1,0,2.5e6)' + + # Training + discount_scale: 5.0 + reward_scale: 1 + weight_decay: 1e-6 + model_lr: 2e-4 + kl_scale: 0.1 + kl_free: 0.0 + actor_lr: 4e-5 + value_lr: 1e-4 + oversample_ends: True + + # Disen + disen_cnn_depth: 16 + disen_only_scale: 1.0 + disen_discount_scale: 2000.0 + disen_reward_scale: 2000.0 + num_reward_opt_iters: 20 + +debug: + + debug: True + pretrain: 1 + prefill: 1 + train_steps: 1 + batch_size: 10 + batch_length: 20 diff --git a/DreamerV2/dreamer.py b/DreamerV2/dreamer.py new file mode 100644 index 0000000..6883bfc --- /dev/null +++ b/DreamerV2/dreamer.py @@ -0,0 +1,316 @@ +import argparse +import collections +import functools +import os +import pathlib +import sys +import warnings + +warnings.filterwarnings('ignore', '.*box bound precision lowered.*') +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +os.environ['MUJOCO_GL'] = 'egl' + +import numpy as np +import ruamel.yaml as yaml +import tensorflow as tf +from tensorflow.keras.mixed_precision import experimental as prec + +tf.get_logger().setLevel('ERROR') + +from tensorflow_probability import distributions as tfd + +sys.path.append(str(pathlib.Path(__file__).parent)) + +import exploration as expl +import models +import tools +import wrappers + +class Dreamer(tools.Module): + + def __init__(self, config, logger, dataset): + self._config = config + self._logger = logger + self._float = prec.global_policy().compute_dtype + self._should_log = tools.Every(config.log_every) + self._should_train = tools.Every(config.train_every) + self._should_pretrain = tools.Once() + self._should_reset = tools.Every(config.reset_every) + self._should_expl = tools.Until(int( + config.expl_until / config.action_repeat)) + self._metrics = collections.defaultdict(tf.metrics.Mean) + with tf.device('cpu:0'): + self._step = tf.Variable(count_steps(config.traindir), dtype=tf.int64) + # Schedules. + config.actor_entropy = ( + lambda x=config.actor_entropy: tools.schedule(x, self._step)) + config.actor_state_entropy = ( + lambda x=config.actor_state_entropy: tools.schedule(x, self._step)) + config.imag_gradient_mix = ( + lambda x=config.imag_gradient_mix: tools.schedule(x, self._step)) + self._dataset = iter(dataset) + self._wm = models.WorldModel(self._step, config) + self._task_behavior = models.ImagBehavior( + config, self._wm, config.behavior_stop_grad) + reward = lambda f, s, a: self._wm.heads['reward'](f).mode() + self._expl_behavior = dict( + greedy=lambda: self._task_behavior, + random=lambda: expl.Random(config), + plan2explore=lambda: expl.Plan2Explore(config, self._wm, reward), + )[config.expl_behavior]() + # Train step to initialize variables including optimizer statistics. + self._train(next(self._dataset)) + + def __call__(self, obs, reset, state=None, training=True): + step = self._step.numpy().item() + if self._should_reset(step): + state = None + if state is not None and reset.any(): + mask = tf.cast(1 - reset, self._float)[:, None] + state = tf.nest.map_structure(lambda x: x * mask, state) + if training and self._should_train(step): + steps = ( + self._config.pretrain if self._should_pretrain() + else self._config.train_steps) + for _ in range(steps): + self._train(next(self._dataset)) + if self._should_log(step): + for name, mean in self._metrics.items(): + self._logger.scalar(name, float(mean.result())) + mean.reset_states() + openl_joint, openl_main, openl_disen, openl_mask = self._wm.video_pred(next(self._dataset)) + self._logger.video('train_openl_joint', openl_joint) + self._logger.video('train_openl_main', openl_main) + self._logger.video('train_openl_disen', openl_disen) + self._logger.video('train_openl_mask', openl_mask) + self._logger.write(fps=True) + action, state = self._policy(obs, state, training) + if training: + self._step.assign_add(len(reset)) + self._logger.step = self._config.action_repeat \ + * self._step.numpy().item() + return action, state + + @tf.function + def _policy(self, obs, state, training): + if state is None: + batch_size = len(obs['image']) + latent = self._wm.dynamics.initial(len(obs['image'])) + action = tf.zeros((batch_size, self._config.num_actions), self._float) + else: + latent, action = state + embed = self._wm.encoder(self._wm.preprocess(obs)) + latent, _ = self._wm.dynamics.obs_step( + latent, action, embed, self._config.collect_dyn_sample) + if self._config.eval_state_mean: + latent['stoch'] = latent['mean'] + feat = self._wm.dynamics.get_feat(latent) + if not training: + action = self._task_behavior.actor(feat).mode() + elif self._should_expl(self._step): + action = self._expl_behavior.actor(feat).sample() + else: + action = self._task_behavior.actor(feat).sample() + if self._config.actor_dist == 'onehot_gumble': + action = tf.cast( + tf.one_hot(tf.argmax(action, axis=-1), self._config.num_actions), + action.dtype) + action = self._exploration(action, training) + state = (latent, action) + return action, state + + def _exploration(self, action, training): + amount = self._config.expl_amount if training else self._config.eval_noise + if amount == 0: + return action + amount = tf.cast(amount, self._float) + if 'onehot' in self._config.actor_dist: + probs = amount / self._config.num_actions + (1 - amount) * action + return tools.OneHotDist(probs=probs).sample() + else: + return tf.clip_by_value(tfd.Normal(action, amount).sample(), -1, 1) + raise NotImplementedError(self._config.action_noise) + + @tf.function + def _train(self, data): + print('Tracing train function.') + metrics = {} + embed, post, feat, kl, mets = self._wm.train(data) + metrics.update(mets) + start = post + if self._config.pred_discount: # Last step could be terminal. + start = {k: v[:, :-1] for k, v in post.items()} + embed, feat, kl = embed[:, :-1], feat[:, :-1], kl[:, :-1] + reward = lambda f, s, a: self._wm.heads['reward'](f).mode() + metrics.update(self._task_behavior.train(start, reward)[-1]) + if self._config.expl_behavior != 'greedy': + mets = self._expl_behavior.train(start, feat, embed, kl)[-1] + metrics.update({'expl_' + key: value for key, value in mets.items()}) + for name, value in metrics.items(): + self._metrics[name].update_state(value) + + +def count_steps(folder): + return sum(int(str(n).split('-')[-1][:-4]) - 1 for n in folder.glob('*.npz')) + + +def make_dataset(episodes, config): + example = episodes[next(iter(episodes.keys()))] + types = {k: v.dtype for k, v in example.items()} + shapes = {k: (None,) + v.shape[1:] for k, v in example.items()} + generator = lambda: tools.sample_episodes( + episodes, config.batch_length, config.oversample_ends) + dataset = tf.data.Dataset.from_generator(generator, types, shapes) + dataset = dataset.batch(config.batch_size, drop_remainder=True) + dataset = dataset.prefetch(10) + return dataset + + +def make_env(config, logger, mode, train_eps, eval_eps): + suite, task = config.task.split('_', 1) + if suite == 'dmc': + env = wrappers.DeepMindControl(task, config.action_repeat, config.size) + env = wrappers.NormalizeActions(env) + elif suite == 'atari': + env = wrappers.Atari( + task, config.action_repeat, config.size, + grayscale=config.atari_grayscale, + life_done=False and (mode == 'train'), + sticky_actions=True, + all_actions=True) + env = wrappers.OneHotAction(env) + else: + raise NotImplementedError(suite) + env = wrappers.TimeLimit(env, config.time_limit) + callbacks = [functools.partial( + process_episode, config, logger, mode, train_eps, eval_eps)] + env = wrappers.CollectDataset(env, callbacks) + env = wrappers.RewardObs(env) + return env + + +def process_episode(config, logger, mode, train_eps, eval_eps, episode): + directory = dict(train=config.traindir, eval=config.evaldir)[mode] + cache = dict(train=train_eps, eval=eval_eps)[mode] + filename = tools.save_episodes(directory, [episode])[0] + length = len(episode['reward']) - 1 + score = float(episode['reward'].astype(np.float64).sum()) + video = episode['image'] + if mode == 'eval': + cache.clear() + if mode == 'train' and config.dataset_size: + total = 0 + for key, ep in reversed(sorted(cache.items(), key=lambda x: x[0])): + if total <= config.dataset_size - length: + total += len(ep['reward']) - 1 + else: + del cache[key] + logger.scalar('dataset_size', total + length) + cache[str(filename)] = episode + print(f'{mode.title()} episode has {length} steps and return {score:.1f}.') + logger.scalar(f'{mode}_return', score) + logger.scalar(f'{mode}_length', length) + logger.scalar(f'{mode}_episodes', len(cache)) + if mode == 'eval' or config.expl_gifs: + logger.video(f'{mode}_policy', video[None]) + logger.write() + + +def main(logdir, config): + + logdir = os.path.join( + logdir, config.task, 'Ours', str(config.seed)) + + logdir = pathlib.Path(logdir).expanduser() + config.traindir = config.traindir or logdir / 'train_eps' + config.evaldir = config.evaldir or logdir / 'eval_eps' + config.steps //= config.action_repeat + config.eval_every //= config.action_repeat + config.log_every //= config.action_repeat + config.time_limit //= config.action_repeat + config.act = getattr(tf.nn, config.act) + + if config.debug: + tf.config.experimental_run_functions_eagerly(True) + if config.gpu_growth: + message = 'No GPU found. To actually train on CPU remove this assert.' + assert tf.config.experimental.list_physical_devices('GPU'), message + for gpu in tf.config.experimental.list_physical_devices('GPU'): + tf.config.experimental.set_memory_growth(gpu, True) + assert config.precision in (16, 32), config.precision + if config.precision == 16: + prec.set_policy(prec.Policy('mixed_float16')) + print('Logdir', logdir) + logdir.mkdir(parents=True, exist_ok=True) + config.traindir.mkdir(parents=True, exist_ok=True) + config.evaldir.mkdir(parents=True, exist_ok=True) + step = count_steps(config.traindir) + logger = tools.Logger(logdir, config.action_repeat * step) + + print('Create envs.') + if config.offline_traindir: + directory = config.offline_traindir.format(**vars(config)) + else: + directory = config.traindir + train_eps = tools.load_episodes(directory, limit=config.dataset_size) + if config.offline_evaldir: + directory = config.offline_evaldir.format(**vars(config)) + else: + directory = config.evaldir + eval_eps = tools.load_episodes(directory, limit=1) + make = lambda mode: make_env(config, logger, mode, train_eps, eval_eps) + train_envs = [make('train') for _ in range(config.envs)] + eval_envs = [make('eval') for _ in range(config.envs)] + acts = train_envs[0].action_space + config.num_actions = acts.n if hasattr(acts, 'n') else acts.shape[0] + + prefill = max(0, config.prefill - count_steps(config.traindir)) + print(f'Prefill dataset ({prefill} steps).') + random_agent = lambda o, d, s: ([acts.sample() for _ in d], s) + tools.simulate(random_agent, train_envs, prefill) + tools.simulate(random_agent, eval_envs, episodes=1) + logger.step = config.action_repeat * count_steps(config.traindir) + + print('Simulate agent.') + train_dataset = make_dataset(train_eps, config) + eval_dataset = iter(make_dataset(eval_eps, config)) + agent = Dreamer(config, logger, train_dataset) + if (logdir / 'variables.pkl').exists(): + agent.load(logdir / 'variables.pkl') + agent._should_pretrain._once = False + + state = None + suite, task = config.task.split('_', 1) + num_eval_episodes = 10 if suite == 'procgen' else 1 + while agent._step.numpy().item() < config.steps: + logger.write() + print('Start evaluation.') + openl_joint, openl_main, openl_disen, openl_mask = agent._wm.video_pred(next(eval_dataset)) + logger.video('eval_openl_joint', openl_joint) + logger.video('eval_openl_main', openl_main) + logger.video('eval_openl_disen', openl_disen) + logger.video('eval_openl_mask', openl_mask) + eval_policy = functools.partial(agent, training=False) + tools.simulate(eval_policy, eval_envs, episodes=num_eval_episodes) + print('Start training.') + state = tools.simulate(agent, train_envs, config.eval_every, state=state) + agent.save(logdir / 'variables.pkl') + for env in train_envs + eval_envs: + try: + env.close() + except Exception: + pass + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--configs', nargs='+', required=True) + args, remaining = parser.parse_known_args() + configs = yaml.safe_load((pathlib.Path(__file__).parent / 'configs.yaml').read_text()) + config_ = {} + for name in args.configs: + config_.update(configs[name]) + parser = argparse.ArgumentParser() + for key, value in config_.items(): + arg_type = tools.args_type(value) + parser.add_argument(f'--{key}', type=arg_type, default=arg_type(value)) + main(config_['logdir'], parser.parse_args(remaining)) diff --git a/DreamerV2/exploration.py b/DreamerV2/exploration.py new file mode 100644 index 0000000..4425ba5 --- /dev/null +++ b/DreamerV2/exploration.py @@ -0,0 +1,83 @@ +import tensorflow as tf +from tensorflow.keras.mixed_precision import experimental as prec +from tensorflow_probability import distributions as tfd + +import models +import networks +import tools + +class Random(tools.Module): + + def __init__(self, config): + self._config = config + self._float = prec.global_policy().comput_dtype + + def actor(self, feat): + shape = feat.shape[:-1] + [self._config.num_actions] + if self._config.actor_dist == 'onehot': + return tools.OneHotDist(tf.zeros(shape)) + else: + ones = tf.ones(shape, self._float) + return tfd.Uniform(-ones, ones) + + def train(self, start, feat, embed, kl): + return None, {} + + +class Plan2Explore(tools.Module): + + def __init__(self, config, world_model, reward=None): + self._config = config + self._reward = reward + self._behavior = models.ImagBehavior(config, world_model) + self.actor = self._behavior.actor + size = { + 'embed': 32 * config.cnn_depth, + 'stoch': config.dyn_stoch, + 'deter': config.dyn_deter, + 'feat': config.dyn_stoch + config.dyn_deter, + }[self._config.disag_target] + kw = dict( + shape=size, layers=config.disag_layers, units=config.disag_units, + act=config.act) + self._networks = [ + networks.DenseHead(**kw) for _ in range(config.disag_models)] + self._opt = tools.Optimizer( + 'ensemble', config.model_lr, config.opt_eps, config.grad_clip, + config.weight_decay, opt=config.opt) + + def train(self, start, feat, embed, kl): + metrics = {} + target = { + 'embed': embed, + 'stoch': start['stoch'], + 'deter': start['deter'], + 'feat': feat, + }[self._config.disag_target] + metrics.update(self._train_ensemble(feat, target)) + metrics.update(self._behavior.train(start, self._intrinsic_reward)[-1]) + return None, metrics + + def _intrinsic_reward(self, feat, state, action): + preds = [head(feat, tf.float32).mean() for head in self._networks] + disag = tf.reduce_mean(tf.math.reduce_std(preds, 0), -1) + if self._config.disag_log: + disag = tf.math.log(disag) + reward = self._config.expl_intr_scale * disag + if self._config.expl_extr_scale: + reward += tf.cast(self._config.expl_extr_scale * self._reward( + feat, state, action), tf.float32) + return reward + + def _train_ensemble(self, inputs, targets): + if self._config.disag_offset: + targets = targets[:, self._config.disag_offset:] + inputs = inputs[:, :-self._config.disag_offset] + targets = tf.stop_gradient(targets) + inputs = tf.stop_gradient(inputs) + with tf.GradientTape() as tape: + preds = [head(inputs) for head in self._networks] + likes = [tf.reduce_mean(pred.log_prob(targets)) for pred in preds] + loss = -tf.cast(tf.reduce_sum(likes), tf.float32) + metrics = self._opt(tape, loss, self._networks) + return metrics diff --git a/DreamerV2/models.py b/DreamerV2/models.py new file mode 100644 index 0000000..8225adb --- /dev/null +++ b/DreamerV2/models.py @@ -0,0 +1,429 @@ +import tensorflow as tf +from tensorflow.keras.mixed_precision import experimental as prec + +import networks +import tools + + +class WorldModel(tools.Module): + + def __init__(self, step, config): + self._step = step + self._config = config + channels = (1 if config.atari_grayscale else 3) + shape = config.size + (channels,) + + ######## + # Main # + ######## + self.encoder = networks.ConvEncoder( + config.cnn_depth, config.act, config.encoder_kernels) + self.dynamics = networks.RSSM( + config.dyn_stoch, config.dyn_deter, config.dyn_hidden, + config.dyn_input_layers, config.dyn_output_layers, config.dyn_shared, + config.dyn_discrete, config.act, config.dyn_mean_act, + config.dyn_std_act, config.dyn_min_std, config.dyn_cell) + self.heads = {} + self.heads['reward'] = networks.DenseHead( + [], config.reward_layers, config.units, config.act) + if config.pred_discount: + self.heads['discount'] = networks.DenseHead( + [], config.discount_layers, config.units, config.act, dist='binary') + self._model_opt = tools.Optimizer( + 'model', config.model_lr, config.opt_eps, config.grad_clip, + config.weight_decay, opt=config.opt) + self._scales = dict( + reward=config.reward_scale, discount=config.discount_scale) + + ######### + # Disen # + ######### + self.disen_encoder = networks.ConvEncoder( + config.disen_cnn_depth, config.act, config.encoder_kernels) + self.disen_dynamics = networks.RSSM( + config.dyn_stoch, config.dyn_deter, config.dyn_hidden, + config.dyn_input_layers, config.dyn_output_layers, config.dyn_shared, + config.dyn_discrete, config.act, config.dyn_mean_act, + config.dyn_std_act, config.dyn_min_std, config.dyn_cell) + + self.disen_heads = {} + self.disen_heads['reward'] = networks.DenseHead( + [], config.reward_layers, config.units, config.act) + if config.pred_discount: + self.disen_heads['discount'] = networks.DenseHead( + [], config.discount_layers, config.units, config.act, dist='binary') + + self._disen_model_opt = tools.Optimizer( + 'disen', config.model_lr, config.opt_eps, config.grad_clip, + config.weight_decay, opt=config.opt) + + self._disen_heads_opt = {} + self._disen_heads_opt['reward'] = tools.Optimizer( + 'disen_reward', config.model_lr, config.opt_eps, config.grad_clip, + config.weight_decay, opt=config.opt) + if config.pred_discount: + self._disen_heads_opt['discount'] = tools.Optimizer( + 'disen_pcont', config.model_lr, config.opt_eps, config.grad_clip, + config.weight_decay, opt=config.opt) + + # negative signs for reward/discount here + self._disen_scales = dict(disen_only=config.disen_only_scale, + reward=-config.disen_reward_scale, discount=-config.disen_discount_scale) + + self.disen_only_image_head = networks.ConvDecoder( + config.disen_cnn_depth, config.act, shape, config.decoder_kernels, + config.decoder_thin) + + ################ + # Joint Decode # + ################ + self.image_head = networks.ConvDecoderMask( + config.cnn_depth, config.act, shape, config.decoder_kernels, + config.decoder_thin) + self.disen_image_head = networks.ConvDecoderMask( + config.disen_cnn_depth, config.act, shape, config.decoder_kernels, + config.decoder_thin) + self.joint_image_head = networks.ConvDecoderMaskEnsemble( + self.image_head, self.disen_image_head + ) + + def train(self, data): + data = self.preprocess(data) + with tf.GradientTape() as model_tape, tf.GradientTape() as disen_tape: + + # kl schedule + kl_balance = tools.schedule(self._config.kl_balance, self._step) + kl_free = tools.schedule(self._config.kl_free, self._step) + kl_scale = tools.schedule(self._config.kl_scale, self._step) + + # Main + embed = self.encoder(data) + post, prior = self.dynamics.observe(embed, data['action']) + kl_loss, kl_value = self.dynamics.kl_loss( + post, prior, kl_balance, kl_free, kl_scale) + feat = self.dynamics.get_feat(post) + likes = {} + for name, head in self.heads.items(): + grad_head = (name in self._config.grad_heads) + inp = feat if grad_head else tf.stop_gradient(feat) + pred = head(inp, tf.float32) + like = pred.log_prob(tf.cast(data[name], tf.float32)) + likes[name] = tf.reduce_mean( + like) * self._scales.get(name, 1.0) + + # Disen + embed_disen = self.disen_encoder(data) + post_disen, prior_disen = self.disen_dynamics.observe( + embed_disen, data['action']) + kl_loss_disen, kl_value_disen = self.dynamics.kl_loss( + post_disen, prior_disen, kl_balance, kl_free, kl_scale) + feat_disen = self.disen_dynamics.get_feat(post_disen) + + # Optimize disen reward/pcont till optimal + disen_metrics = dict(reward={}, discount={}) + loss_disen = dict(reward=None, discount=None) + for _ in range(self._config.num_reward_opt_iters): + with tf.GradientTape() as disen_reward_tape, tf.GradientTape() as disen_pcont_tape: + disen_gradient_tapes = dict( + reward=disen_reward_tape, discount=disen_pcont_tape) + for name, head in self.disen_heads.items(): + pred_disen = head( + tf.stop_gradient(feat_disen), tf.float32) + loss_disen[name] = -tf.reduce_mean(pred_disen.log_prob( + tf.cast(data[name], tf.float32))) + for name, head in self.disen_heads.items(): + disen_metrics[name] = self._disen_heads_opt[name]( + disen_gradient_tapes[name], loss_disen[name], [head], prefix='disen_neg') + + # Compute likes for disen model (including negative gradients) + likes_disen = {} + for name, head in self.disen_heads.items(): + pred_disen = head(feat_disen, tf.float32) + like_disen = pred_disen.log_prob( + tf.cast(data[name], tf.float32)) + likes_disen[name] = tf.reduce_mean( + like_disen) * self._disen_scales.get(name, -1.0) + disen_only_image_pred = self.disen_only_image_head( + feat_disen, tf.float32) + disen_only_image_like = tf.reduce_mean(disen_only_image_pred.log_prob( + tf.cast(data['image'], tf.float32))) * self._disen_scales.get('disen_only', 1.0) + likes_disen['disen_only'] = disen_only_image_like + + # Joint decode + image_pred_joint, _, _, _ = self.joint_image_head( + feat, feat_disen, tf.float32) + image_like = tf.reduce_mean(image_pred_joint.log_prob( + tf.cast(data['image'], tf.float32))) + likes['image'] = image_like + likes_disen['image'] = image_like + + # Compute loss + model_loss = kl_loss - sum(likes.values()) + disen_loss = kl_loss_disen - sum(likes_disen.values()) + + model_parts = [self.encoder, self.dynamics, + self.joint_image_head] + list(self.heads.values()) + disen_parts = [self.disen_encoder, self.disen_dynamics, + self.joint_image_head, self.disen_only_image_head] + + metrics = self._model_opt( + model_tape, model_loss, model_parts, prefix='main') + disen_model_metrics = self._disen_model_opt( + disen_tape, disen_loss, disen_parts, prefix='disen') + + metrics['kl_balance'] = kl_balance + metrics['kl_free'] = kl_free + metrics['kl_scale'] = kl_scale + metrics.update({f'{name}_loss': -like for name, + like in likes.items()}) + + metrics['disen/disen_only_image_loss'] = -disen_only_image_like + metrics['disen/disen_reward_loss'] = -likes_disen['reward'] / \ + self._disen_scales.get('reward', -1.0) + metrics['disen/disen_discount_loss'] = -likes_disen['discount'] / \ + self._disen_scales.get('discount', -1.0) + + metrics['kl'] = tf.reduce_mean(kl_value) + metrics['prior_ent'] = self.dynamics.get_dist(prior).entropy() + metrics['post_ent'] = self.dynamics.get_dist(post).entropy() + metrics['disen/kl'] = tf.reduce_mean(kl_value_disen) + metrics['disen/prior_ent'] = self.dynamics.get_dist( + prior_disen).entropy() + metrics['disen/post_ent'] = self.dynamics.get_dist( + post_disen).entropy() + + metrics.update( + {f'{key}': value for key, value in disen_metrics['reward'].items()}) + metrics.update( + {f'{key}': value for key, value in disen_metrics['discount'].items()}) + metrics.update( + {f'{key}': value for key, value in disen_model_metrics.items()}) + + return embed, post, feat, kl_value, metrics + + @tf.function + def preprocess(self, obs): + dtype = prec.global_policy().compute_dtype + obs = obs.copy() + obs['image'] = tf.cast(obs['image'], dtype) / 255.0 - 0.5 + obs['reward'] = getattr(tf, self._config.clip_rewards)(obs['reward']) + if 'discount' in obs: + obs['discount'] *= self._config.discount + for key, value in obs.items(): + if tf.dtypes.as_dtype(value.dtype) in ( + tf.float16, tf.float32, tf.float64): + obs[key] = tf.cast(value, dtype) + return obs + + @tf.function + def video_pred(self, data): + data = self.preprocess(data) + truth = data['image'][:6] + 0.5 + + embed = self.encoder(data) + embed_disen = self.disen_encoder(data) + states, _ = self.dynamics.observe( + embed[:6, :5], data['action'][:6, :5]) + states_disen, _ = self.disen_dynamics.observe( + embed_disen[:6, :5], data['action'][:6, :5]) + feats = self.dynamics.get_feat(states) + feats_disen = self.disen_dynamics.get_feat(states_disen) + recon_joint, recon_main, recon_disen, recon_mask = self.joint_image_head( + feats, feats_disen) + recon_joint = recon_joint.mode()[:6] + recon_main = recon_main.mode()[:6] + recon_disen = recon_disen.mode()[:6] + recon_mask = recon_mask[:6] + + init = {k: v[:, -1] for k, v in states.items()} + init_disen = {k: v[:, -1] for k, v in states_disen.items()} + prior = self.dynamics.imagine( + data['action'][:6, 5:], init) + prior_disen = self.disen_dynamics.imagine( + data['action'][:6, 5:], init_disen) + _feats = self.dynamics.get_feat(prior) + _feats_disen = self.disen_dynamics.get_feat(prior_disen) + openl_joint, openl_main, openl_disen, openl_mask = self.joint_image_head( + _feats, _feats_disen) + openl_joint = openl_joint.mode() + openl_main = openl_main.mode() + openl_disen = openl_disen.mode() + + model_joint = tf.concat( + [recon_joint[:, :5] + 0.5, openl_joint + 0.5], 1) + error_joint = (model_joint - truth + 1) / 2 + model_main = tf.concat( + [recon_main[:, :5] + 0.5, openl_main + 0.5], 1) + error_main = (model_main - truth + 1) / 2 + model_disen = tf.concat( + [recon_disen[:, :5] + 0.5, openl_disen + 0.5], 1) + error_disen = (model_disen - truth + 1) / 2 + model_mask = tf.concat( + [recon_mask[:, :5] + 0.5, openl_mask + 0.5], 1) + + output_joint = tf.concat([truth, model_joint, error_joint], 2) + output_main = tf.concat([truth, model_main, error_main], 2) + output_disen = tf.concat([truth, model_disen, error_disen], 2) + output_mask = model_mask + + return output_joint, output_main, output_disen, output_mask + + +class ImagBehavior(tools.Module): + + def __init__(self, config, world_model, stop_grad_actor=True, reward=None): + self._config = config + self._world_model = world_model + self._stop_grad_actor = stop_grad_actor + self._reward = reward + self.actor = networks.ActionHead( + config.num_actions, config.actor_layers, config.units, config.act, + config.actor_dist, config.actor_init_std, config.actor_min_std, + config.actor_dist, config.actor_temp, config.actor_outscale) + self.value = networks.DenseHead( + [], config.value_layers, config.units, config.act, + config.value_head) + if config.slow_value_target or config.slow_actor_target: + self._slow_value = networks.DenseHead( + [], config.value_layers, config.units, config.act) + self._updates = tf.Variable(0, tf.int64) + kw = dict(wd=config.weight_decay, opt=config.opt) + self._actor_opt = tools.Optimizer( + 'actor', config.actor_lr, config.opt_eps, config.actor_grad_clip, **kw) + self._value_opt = tools.Optimizer( + 'value', config.value_lr, config.opt_eps, config.value_grad_clip, **kw) + + def train( + self, start, objective=None, imagine=None, tape=None, repeats=None): + objective = objective or self._reward + self._update_slow_target() + metrics = {} + with (tape or tf.GradientTape()) as actor_tape: + assert bool(objective) != bool(imagine) + if objective: + imag_feat, imag_state, imag_action = self._imagine( + start, self.actor, self._config.imag_horizon, repeats) + reward = objective(imag_feat, imag_state, imag_action) + else: + imag_feat, imag_state, imag_action, reward = imagine(start) + actor_ent = self.actor(imag_feat, tf.float32).entropy() + state_ent = self._world_model.dynamics.get_dist( + imag_state, tf.float32).entropy() + target, weights = self._compute_target( + imag_feat, reward, actor_ent, state_ent, + self._config.slow_actor_target) + actor_loss, mets = self._compute_actor_loss( + imag_feat, imag_state, imag_action, target, actor_ent, state_ent, + weights) + metrics.update(mets) + if self._config.slow_value_target != self._config.slow_actor_target: + target, weights = self._compute_target( + imag_feat, reward, actor_ent, state_ent, + self._config.slow_value_target) + with tf.GradientTape() as value_tape: + value = self.value(imag_feat, tf.float32)[:-1] + value_loss = -value.log_prob(tf.stop_gradient(target)) + if self._config.value_decay: + value_loss += self._config.value_decay * value.mode() + value_loss = tf.reduce_mean(weights[:-1] * value_loss) + metrics['reward_mean'] = tf.reduce_mean(reward) + metrics['reward_std'] = tf.math.reduce_std(reward) + metrics['actor_ent'] = tf.reduce_mean(actor_ent) + metrics.update(self._actor_opt(actor_tape, actor_loss, [self.actor])) + metrics.update(self._value_opt(value_tape, value_loss, [self.value])) + return imag_feat, imag_state, imag_action, weights, metrics + + def _imagine(self, start, policy, horizon, repeats=None): + dynamics = self._world_model.dynamics + if repeats: + start = {k: tf.repeat(v, repeats, axis=1) + for k, v in start.items()} + + def flatten(x): return tf.reshape(x, [-1] + list(x.shape[2:])) + start = {k: flatten(v) for k, v in start.items()} + + def step(prev, _): + state, _, _ = prev + feat = dynamics.get_feat(state) + inp = tf.stop_gradient(feat) if self._stop_grad_actor else feat + action = policy(inp).sample() + succ = dynamics.img_step( + state, action, sample=self._config.imag_sample) + return succ, feat, action + feat = 0 * dynamics.get_feat(start) + action = policy(feat).mode() + succ, feats, actions = tools.static_scan( + step, tf.range(horizon), (start, feat, action)) + states = {k: tf.concat([ + start[k][None], v[:-1]], 0) for k, v in succ.items()} + if repeats: + def unfold(tensor): + s = tensor.shape + return tf.reshape(tensor, [s[0], s[1] // repeats, repeats] + s[2:]) + states, feats, actions = tf.nest.map_structure( + unfold, (states, feats, actions)) + return feats, states, actions + + def _compute_target(self, imag_feat, reward, actor_ent, state_ent, slow): + reward = tf.cast(reward, tf.float32) + if 'discount' in self._world_model.heads: + discount = self._world_model.heads['discount']( + imag_feat, tf.float32).mean() + else: + discount = self._config.discount * tf.ones_like(reward) + if self._config.future_entropy and tf.greater( + self._config.actor_entropy(), 0): + reward += self._config.actor_entropy() * actor_ent + if self._config.future_entropy and tf.greater( + self._config.actor_state_entropy(), 0): + reward += self._config.actor_state_entropy() * state_ent + if slow: + value = self._slow_value(imag_feat, tf.float32).mode() + else: + value = self.value(imag_feat, tf.float32).mode() + target = tools.lambda_return( + reward[:-1], value[:-1], discount[:-1], + bootstrap=value[-1], lambda_=self._config.discount_lambda, axis=0) + weights = tf.stop_gradient(tf.math.cumprod(tf.concat( + [tf.ones_like(discount[:1]), discount[:-1]], 0), 0)) + return target, weights + + def _compute_actor_loss( + self, imag_feat, imag_state, imag_action, target, actor_ent, state_ent, + weights): + metrics = {} + inp = tf.stop_gradient( + imag_feat) if self._stop_grad_actor else imag_feat + policy = self.actor(inp, tf.float32) + actor_ent = policy.entropy() + if self._config.imag_gradient == 'dynamics': + actor_target = target + elif self._config.imag_gradient == 'reinforce': + imag_action = tf.cast(imag_action, tf.float32) + actor_target = policy.log_prob(imag_action)[:-1] * tf.stop_gradient( + target - self.value(imag_feat[:-1], tf.float32).mode()) + elif self._config.imag_gradient == 'both': + imag_action = tf.cast(imag_action, tf.float32) + actor_target = policy.log_prob(imag_action)[:-1] * tf.stop_gradient( + target - self.value(imag_feat[:-1], tf.float32).mode()) + mix = self._config.imag_gradient_mix() + actor_target = mix * target + (1 - mix) * actor_target + metrics['imag_gradient_mix'] = mix + else: + raise NotImplementedError(self._config.imag_gradient) + if not self._config.future_entropy and tf.greater( + self._config.actor_entropy(), 0): + actor_target += self._config.actor_entropy() * actor_ent[:-1] + if not self._config.future_entropy and tf.greater( + self._config.actor_state_entropy(), 0): + actor_target += self._config.actor_state_entropy() * state_ent[:-1] + actor_loss = -tf.reduce_mean(weights[:-1] * actor_target) + return actor_loss, metrics + + def _update_slow_target(self): + if self._config.slow_value_target or self._config.slow_actor_target: + if self._updates % self._config.slow_target_update == 0: + mix = self._config.slow_target_fraction + for s, d in zip(self.value.variables, self._slow_value.variables): + d.assign(mix * s + (1 - mix) * d) + self._updates.assign_add(1) diff --git a/DreamerV2/networks.py b/DreamerV2/networks.py new file mode 100644 index 0000000..7a65d15 --- /dev/null +++ b/DreamerV2/networks.py @@ -0,0 +1,465 @@ +import numpy as np +import tensorflow as tf +from tensorflow.keras import layers as tfkl +from tensorflow_probability import distributions as tfd +from tensorflow.keras.mixed_precision import experimental as prec + +import tools + +class RSSM(tools.Module): + + def __init__( + self, stoch=30, deter=200, hidden=200, layers_input=1, layers_output=1, + shared=False, discrete=False, act=tf.nn.elu, mean_act='none', + std_act='softplus', min_std=0.1, cell='keras'): + super().__init__() + self._stoch = stoch + self._deter = deter + self._hidden = hidden + self._min_std = min_std + self._layers_input = layers_input + self._layers_output = layers_output + self._shared = shared + self._discrete = discrete + self._act = act + self._mean_act = mean_act + self._std_act = std_act + self._embed = None + if cell == 'gru': + self._cell = tfkl.GRUCell(self._deter) + elif cell == 'gru_layer_norm': + self._cell = GRUCell(self._deter, norm=True) + else: + raise NotImplementedError(cell) + + def initial(self, batch_size): + dtype = prec.global_policy().compute_dtype + if self._discrete: + state = dict( + logit=tf.zeros( + [batch_size, self._stoch, self._discrete], dtype), + stoch=tf.zeros( + [batch_size, self._stoch, self._discrete], dtype), + deter=self._cell.get_initial_state(None, batch_size, dtype)) + else: + state = dict( + mean=tf.zeros([batch_size, self._stoch], dtype), + std=tf.zeros([batch_size, self._stoch], dtype), + stoch=tf.zeros([batch_size, self._stoch], dtype), + deter=self._cell.get_initial_state(None, batch_size, dtype)) + return state + + @tf.function + def observe(self, embed, action, state=None): + def swap(x): return tf.transpose( + x, [1, 0] + list(range(2, len(x.shape)))) + if state is None: + state = self.initial(tf.shape(action)[0]) + embed, action = swap(embed), swap(action) + post, prior = tools.static_scan( + lambda prev, inputs: self.obs_step(prev[0], *inputs), + (action, embed), (state, state)) + post = {k: swap(v) for k, v in post.items()} + prior = {k: swap(v) for k, v in prior.items()} + return post, prior + + @tf.function + def imagine(self, action, state=None): + def swap(x): return tf.transpose( + x, [1, 0] + list(range(2, len(x.shape)))) + if state is None: + state = self.initial(tf.shape(action)[0]) + assert isinstance(state, dict), state + action = swap(action) + prior = tools.static_scan(self.img_step, action, state) + prior = {k: swap(v) for k, v in prior.items()} + return prior + + def get_feat(self, state): + stoch = state['stoch'] + if self._discrete: + shape = stoch.shape[:-2] + [self._stoch * self._discrete] + stoch = tf.reshape(stoch, shape) + return tf.concat([stoch, state['deter']], -1) + + def get_dist(self, state, dtype=None): + if self._discrete: + logit = state['logit'] + logit = tf.cast(logit, tf.float32) + dist = tfd.Independent(tools.OneHotDist(logit), 1) + if dtype != tf.float32: + dist = tools.DtypeDist(dist, dtype or state['logit'].dtype) + else: + mean, std = state['mean'], state['std'] + if dtype: + mean = tf.cast(mean, dtype) + std = tf.cast(std, dtype) + dist = tfd.MultivariateNormalDiag(mean, std) + return dist + + @tf.function + def obs_step(self, prev_state, prev_action, embed, sample=True): + if not self._embed: + self._embed = embed.shape[-1] + prior = self.img_step(prev_state, prev_action, None, sample) + if self._shared: + post = self.img_step(prev_state, prev_action, embed, sample) + else: + x = tf.concat([prior['deter'], embed], -1) + for i in range(self._layers_output): + x = self.get(f'obi{i}', tfkl.Dense, self._hidden, self._act)(x) + stats = self._suff_stats_layer('obs', x) + if sample: + stoch = self.get_dist(stats).sample() + else: + stoch = self.get_dist(stats).mode() + post = {'stoch': stoch, 'deter': prior['deter'], **stats} + return post, prior + + @tf.function + def img_step(self, prev_state, prev_action, embed=None, sample=True): + prev_stoch = prev_state['stoch'] + if self._discrete: + shape = prev_stoch.shape[:-2] + [self._stoch * self._discrete] + prev_stoch = tf.reshape(prev_stoch, shape) + if self._shared: + if embed is None: + shape = prev_action.shape[:-1] + [self._embed] + embed = tf.zeros(shape, prev_action.dtype) + x = tf.concat([prev_stoch, prev_action, embed], -1) + else: + x = tf.concat([prev_stoch, prev_action], -1) + for i in range(self._layers_input): + x = self.get(f'ini{i}', tfkl.Dense, self._hidden, self._act)(x) + x, deter = self._cell(x, [prev_state['deter']]) + deter = deter[0] # Keras wraps the state in a list. + for i in range(self._layers_output): + x = self.get(f'imo{i}', tfkl.Dense, self._hidden, self._act)(x) + stats = self._suff_stats_layer('ims', x) + if sample: + stoch = self.get_dist(stats).sample() + else: + stoch = self.get_dist(stats).mode() + prior = {'stoch': stoch, 'deter': deter, **stats} + return prior + + def _suff_stats_layer(self, name, x): + if self._discrete: + x = self.get(name, tfkl.Dense, self._stoch * + self._discrete, None)(x) + logit = tf.reshape(x, x.shape[:-1] + [self._stoch, self._discrete]) + return {'logit': logit} + else: + x = self.get(name, tfkl.Dense, 2 * self._stoch, None)(x) + mean, std = tf.split(x, 2, -1) + mean = { + 'none': lambda: mean, + 'tanh5': lambda: 5.0 * tf.math.tanh(mean / 5.0), + }[self._mean_act]() + std = { + 'softplus': lambda: tf.nn.softplus(std), + 'abs': lambda: tf.math.abs(std + 1), + 'sigmoid': lambda: tf.nn.sigmoid(std), + 'sigmoid2': lambda: 2 * tf.nn.sigmoid(std / 2), + }[self._std_act]() + std = std + self._min_std + return {'mean': mean, 'std': std} + + def kl_loss(self, post, prior, balance, free, scale): + kld = tfd.kl_divergence + def dist(x): return self.get_dist(x, tf.float32) + if balance == 0.5: + value = kld(dist(prior), dist(post)) + loss = tf.reduce_mean(tf.maximum(value, free)) + else: + def sg(x): return tf.nest.map_structure(tf.stop_gradient, x) + value = kld(dist(prior), dist(sg(post))) + pri = tf.reduce_mean(value) + pos = tf.reduce_mean(kld(dist(sg(prior)), dist(post))) + pri, pos = tf.maximum(pri, free), tf.maximum(pos, free) + loss = balance * pri + (1 - balance) * pos + loss *= scale + return loss, value + + +class ConvEncoder(tools.Module): + + def __init__( + self, depth=32, act=tf.nn.relu, kernels=(4, 4, 4, 4)): + self._act = act + self._depth = depth + self._kernels = kernels + + def __call__(self, obs): + kwargs = dict(strides=2, activation=self._act) + Conv = tfkl.Conv2D + x = tf.reshape(obs['image'], (-1,) + tuple(obs['image'].shape[-3:])) + x = self.get('h1', Conv, 1 * self._depth, + self._kernels[0], **kwargs)(x) + x = self.get('h2', Conv, 2 * self._depth, + self._kernels[1], **kwargs)(x) + x = self.get('h3', Conv, 4 * self._depth, + self._kernels[2], **kwargs)(x) + x = self.get('h4', Conv, 8 * self._depth, + self._kernels[3], **kwargs)(x) + x = tf.reshape(x, [x.shape[0], np.prod(x.shape[1:])]) + shape = tf.concat([tf.shape(obs['image'])[:-3], [x.shape[-1]]], 0) + return tf.reshape(x, shape) + + +class ConvDecoder(tools.Module): + + def __init__( + self, depth=32, act=tf.nn.relu, shape=(64, 64, 3), kernels=(5, 5, 6, 6), + thin=True): + self._act = act + self._depth = depth + self._shape = shape + self._kernels = kernels + self._thin = thin + + def __call__(self, features, dtype=None): + kwargs = dict(strides=2, activation=self._act) + ConvT = tfkl.Conv2DTranspose + if self._thin: + x = self.get('h1', tfkl.Dense, 32 * self._depth, None)(features) + x = tf.reshape(x, [-1, 1, 1, 32 * self._depth]) + else: + x = self.get('h1', tfkl.Dense, 128 * self._depth, None)(features) + x = tf.reshape(x, [-1, 2, 2, 32 * self._depth]) + x = self.get('h2', ConvT, 4 * self._depth, + self._kernels[0], **kwargs)(x) + x = self.get('h3', ConvT, 2 * self._depth, + self._kernels[1], **kwargs)(x) + x = self.get('h4', ConvT, 1 * self._depth, + self._kernels[2], **kwargs)(x) + x = self.get( + 'h5', ConvT, self._shape[-1], self._kernels[3], strides=2)(x) + mean = tf.reshape(x, tf.concat( + [tf.shape(features)[:-1], self._shape], 0)) + if dtype: + mean = tf.cast(mean, dtype) + return tfd.Independent(tfd.Normal(mean, 1), len(self._shape)) + + +class ConvDecoderMask(tools.Module): + + def __init__( + self, depth=32, act=tf.nn.relu, shape=(64, 64, 3), kernels=(5, 5, 6, 6), + thin=True): + self._act = act + self._depth = depth + self._shape = shape + self._kernels = kernels + self._thin = thin + + def __call__(self, features, dtype=None): + kwargs = dict(strides=2, activation=self._act) + ConvT = tfkl.Conv2DTranspose + if self._thin: + x = self.get('h1', tfkl.Dense, 32 * self._depth, None)(features) + x = tf.reshape(x, [-1, 1, 1, 32 * self._depth]) + else: + x = self.get('h1', tfkl.Dense, 128 * self._depth, None)(features) + x = tf.reshape(x, [-1, 2, 2, 32 * self._depth]) + x = self.get('h2', ConvT, 4 * self._depth, + self._kernels[0], **kwargs)(x) + x = self.get('h3', ConvT, 2 * self._depth, + self._kernels[1], **kwargs)(x) + x = self.get('h4', ConvT, 1 * self._depth, + self._kernels[2], **kwargs)(x) + x = self.get( + 'h5', ConvT, 2 * self._shape[-1], self._kernels[3], strides=2)(x) + mean, mask = tf.split(x, [self._shape[-1], self._shape[-1]], -1) + mean = tf.reshape(mean, tf.concat( + [tf.shape(features)[:-1], self._shape], 0)) + mask = tf.reshape(mask, tf.concat( + [tf.shape(features)[:-1], self._shape], 0)) + if dtype: + mean = tf.cast(mean, dtype) + mask = tf.cast(mask, dtype) + return tfd.Independent(tfd.Normal(mean, 1), len(self._shape)), mask + + +class ConvDecoderMaskEnsemble(tools.Module): + """ + ensemble two convdecoder with outputs + NOTE: remove pred1/pred2 for maximum performance. + """ + + def __init__(self, decoder1, decoder2): + self._decoder1 = decoder1 + self._decoder2 = decoder2 + self._shape = decoder1._shape + + def __call__(self, feat1, feat2, dtype=None): + kwargs = dict(strides=1, activation=tf.nn.sigmoid) + pred1, mask1 = self._decoder1(feat1, dtype) + pred2, mask2 = self._decoder2(feat2, dtype) + mean1 = pred1.submodules[0].loc + mean2 = pred2.submodules[0].loc + mask_feat = tf.concat([mask1, mask2], -1) + mask = self.get('mask1', tfkl.Conv2D, 1, 1, **kwargs)(mask_feat) + mask_use1 = mask + mask_use2 = 1-mask + mean = mean1 * tf.cast(mask_use1, mean1.dtype) + \ + mean2 * tf.cast(mask_use2, mean2.dtype) + return tfd.Independent(tfd.Normal(mean, 1), len(self._shape)), pred1, pred2, tf.cast(mask_use1, mean1.dtype) + + +class DenseHead(tools.Module): + + def __init__( + self, shape, layers, units, act=tf.nn.elu, dist='normal', std=1.0): + self._shape = (shape,) if isinstance(shape, int) else shape + self._layers = layers + self._units = units + self._act = act + self._dist = dist + self._std = std + + def __call__(self, features, dtype=None): + x = features + for index in range(self._layers): + x = self.get(f'h{index}', tfkl.Dense, self._units, self._act)(x) + mean = self.get(f'hmean', tfkl.Dense, np.prod(self._shape))(x) + mean = tf.reshape(mean, tf.concat( + [tf.shape(features)[:-1], self._shape], 0)) + if self._std == 'learned': + std = self.get(f'hstd', tfkl.Dense, np.prod(self._shape))(x) + std = tf.nn.softplus(std) + 0.01 + std = tf.reshape(std, tf.concat( + [tf.shape(features)[:-1], self._shape], 0)) + else: + std = self._std + if dtype: + mean, std = tf.cast(mean, dtype), tf.cast(std, dtype) + if self._dist == 'normal': + return tfd.Independent(tfd.Normal(mean, std), len(self._shape)) + if self._dist == 'huber': + return tfd.Independent( + tools.UnnormalizedHuber(mean, std, 1.0), len(self._shape)) + if self._dist == 'binary': + return tfd.Independent(tfd.Bernoulli(mean), len(self._shape)) + raise NotImplementedError(self._dist) + + +class ActionHead(tools.Module): + + def __init__( + self, size, layers, units, act=tf.nn.elu, dist='trunc_normal', + init_std=0.0, min_std=0.1, action_disc=5, temp=0.1, outscale=0): + # assert min_std <= 2 + self._size = size + self._layers = layers + self._units = units + self._dist = dist + self._act = act + self._min_std = min_std + self._init_std = init_std + self._action_disc = action_disc + self._temp = temp() if callable(temp) else temp + self._outscale = outscale + + def __call__(self, features, dtype=None): + x = features + for index in range(self._layers): + kw = {} + if index == self._layers - 1 and self._outscale: + kw['kernel_initializer'] = tf.keras.initializers.VarianceScaling( + self._outscale) + x = self.get(f'h{index}', tfkl.Dense, + self._units, self._act, **kw)(x) + if self._dist == 'tanh_normal': + # https://www.desmos.com/calculator/rcmcf5jwe7 + x = self.get(f'hout', tfkl.Dense, 2 * self._size)(x) + if dtype: + x = tf.cast(x, dtype) + mean, std = tf.split(x, 2, -1) + mean = tf.tanh(mean) + std = tf.nn.softplus(std + self._init_std) + self._min_std + dist = tfd.Normal(mean, std) + dist = tfd.TransformedDistribution(dist, tools.TanhBijector()) + dist = tfd.Independent(dist, 1) + dist = tools.SampleDist(dist) + elif self._dist == 'tanh_normal_5': + x = self.get(f'hout', tfkl.Dense, 2 * self._size)(x) + if dtype: + x = tf.cast(x, dtype) + mean, std = tf.split(x, 2, -1) + mean = 5 * tf.tanh(mean / 5) + std = tf.nn.softplus(std + 5) + 5 + dist = tfd.Normal(mean, std) + dist = tfd.TransformedDistribution(dist, tools.TanhBijector()) + dist = tfd.Independent(dist, 1) + dist = tools.SampleDist(dist) + elif self._dist == 'normal': + x = self.get(f'hout', tfkl.Dense, 2 * self._size)(x) + if dtype: + x = tf.cast(x, dtype) + mean, std = tf.split(x, 2, -1) + std = tf.nn.softplus(std + self._init_std) + self._min_std + dist = tfd.Normal(mean, std) + dist = tfd.Independent(dist, 1) + elif self._dist == 'normal_1': + mean = self.get(f'hout', tfkl.Dense, self._size)(x) + if dtype: + mean = tf.cast(mean, dtype) + dist = tfd.Normal(mean, 1) + dist = tfd.Independent(dist, 1) + elif self._dist == 'trunc_normal': + # https://www.desmos.com/calculator/mmuvuhnyxo + x = self.get(f'hout', tfkl.Dense, 2 * self._size)(x) + x = tf.cast(x, tf.float32) + mean, std = tf.split(x, 2, -1) + mean = tf.tanh(mean) + std = 2 * tf.nn.sigmoid(std / 2) + self._min_std + dist = tools.SafeTruncatedNormal(mean, std, -1, 1) + dist = tools.DtypeDist(dist, dtype) + dist = tfd.Independent(dist, 1) + elif self._dist == 'onehot': + x = self.get(f'hout', tfkl.Dense, self._size)(x) + x = tf.cast(x, tf.float32) + dist = tools.OneHotDist(x, dtype=dtype) + dist = tools.DtypeDist(dist, dtype) + elif self._dist == 'onehot_gumble': + x = self.get(f'hout', tfkl.Dense, self._size)(x) + if dtype: + x = tf.cast(x, dtype) + temp = self._temp + dist = tools.GumbleDist(temp, x, dtype=dtype) + else: + raise NotImplementedError(self._dist) + return dist + + +class GRUCell(tf.keras.layers.AbstractRNNCell): + + def __init__(self, size, norm=False, act=tf.tanh, update_bias=-1, **kwargs): + super().__init__() + self._size = size + self._act = act + self._norm = norm + self._update_bias = update_bias + self._layer = tfkl.Dense(3 * size, use_bias=norm is not None, **kwargs) + if norm: + self._norm = tfkl.LayerNormalization(dtype=tf.float32) + + @property + def state_size(self): + return self._size + + def call(self, inputs, state): + state = state[0] # Keras wraps the state in a list. + parts = self._layer(tf.concat([inputs, state], -1)) + if self._norm: + dtype = parts.dtype + parts = tf.cast(parts, tf.float32) + parts = self._norm(parts) + parts = tf.cast(parts, dtype) + reset, cand, update = tf.split(parts, 3, -1) + reset = tf.nn.sigmoid(reset) + cand = self._act(reset * cand) + update = tf.nn.sigmoid(update + self._update_bias) + output = update * cand + (1 - update) * state + return output, [output] diff --git a/DreamerV2/tools.py b/DreamerV2/tools.py new file mode 100644 index 0000000..5b0dbb1 --- /dev/null +++ b/DreamerV2/tools.py @@ -0,0 +1,694 @@ +import datetime +import io +import json +import pathlib +import pickle +import re +import time +import uuid + +import numpy as np +import tensorflow as tf +import tensorflow.compat.v1 as tf1 +import tensorflow_probability as tfp +from tensorflow.keras.mixed_precision import experimental as prec +from tensorflow_probability import distributions as tfd + + +# Patch to ignore seed to avoid synchronization across GPUs. +_orig_random_categorical = tf.random.categorical +def random_categorical(*args, **kwargs): + kwargs['seed'] = None + return _orig_random_categorical(*args, **kwargs) +tf.random.categorical = random_categorical + +# Patch to ignore seed to avoid synchronization across GPUs. +_orig_random_normal = tf.random.normal +def random_normal(*args, **kwargs): + kwargs['seed'] = None + return _orig_random_normal(*args, **kwargs) +tf.random.normal = random_normal + + +class AttrDict(dict): + + __setattr__ = dict.__setitem__ + __getattr__ = dict.__getitem__ + + +class Module(tf.Module): + + def save(self, filename): + values = tf.nest.map_structure(lambda x: x.numpy(), self.variables) + amount = len(tf.nest.flatten(values)) + count = int(sum(np.prod(x.shape) for x in tf.nest.flatten(values))) + print(f'Save checkpoint with {amount} tensors and {count} parameters.') + with pathlib.Path(filename).open('wb') as f: + pickle.dump(values, f) + + def load(self, filename): + with pathlib.Path(filename).open('rb') as f: + values = pickle.load(f) + amount = len(tf.nest.flatten(values)) + count = int(sum(np.prod(x.shape) for x in tf.nest.flatten(values))) + print(f'Load checkpoint with {amount} tensors and {count} parameters.') + tf.nest.map_structure(lambda x, y: x.assign(y), self.variables, values) + + def get(self, name, ctor, *args, **kwargs): + # Create or get layer by name to avoid mentioning it in the constructor. + if not hasattr(self, '_modules'): + self._modules = {} + if name not in self._modules: + self._modules[name] = ctor(*args, **kwargs) + return self._modules[name] + + +def var_nest_names(nest): + if isinstance(nest, dict): + items = ' '.join(f'{k}:{var_nest_names(v)}' for k, v in nest.items()) + return '{' + items + '}' + if isinstance(nest, (list, tuple)): + items = ' '.join(var_nest_names(v) for v in nest) + return '[' + items + ']' + if hasattr(nest, 'name') and hasattr(nest, 'shape'): + return nest.name + str(nest.shape).replace(', ', 'x') + if hasattr(nest, 'shape'): + return str(nest.shape).replace(', ', 'x') + return '?' + + +class Logger: + + def __init__(self, logdir, step): + self._logdir = logdir + self._writer = tf.summary.create_file_writer(str(logdir), max_queue=1000) + self._last_step = None + self._last_time = None + self._scalars = {} + self._images = {} + self._videos = {} + self.step = step + + def scalar(self, name, value): + self._scalars[name] = float(value) + + def image(self, name, value): + self._images[name] = np.array(value) + + def video(self, name, value): + self._videos[name] = np.array(value) + + def write(self, fps=False): + scalars = list(self._scalars.items()) + if fps: + scalars.append(('fps', self._compute_fps(self.step))) + print(f'[{self.step}]', ' / '.join(f'{k} {v:.1f}' for k, v in scalars)) + with (self._logdir / 'metrics.jsonl').open('a') as f: + f.write(json.dumps({'step': self.step, ** dict(scalars)}) + '\n') + with self._writer.as_default(): + for name, value in scalars: + tf.summary.scalar('scalars/' + name, value, self.step) + for name, value in self._images.items(): + tf.summary.image(name, value, self.step) + for name, value in self._videos.items(): + video_summary(name, value, self.step) + self._writer.flush() + self._scalars = {} + self._images = {} + self._videos = {} + + def _compute_fps(self, step): + if self._last_step is None: + self._last_time = time.time() + self._last_step = step + return 0 + steps = step - self._last_step + duration = time.time() - self._last_time + self._last_time += duration + self._last_step = step + return steps / duration + + +def graph_summary(writer, step, fn, *args): + def inner(*args): + tf.summary.experimental.set_step(step.numpy().item()) + with writer.as_default(): + fn(*args) + return tf.numpy_function(inner, args, []) + + +def video_summary(name, video, step=None, fps=20): + name = name if isinstance(name, str) else name.decode('utf-8') + if np.issubdtype(video.dtype, np.floating): + video = np.clip(255 * video, 0, 255).astype(np.uint8) + B, T, H, W, C = video.shape + try: + frames = video.transpose((1, 2, 0, 3, 4)).reshape((T, H, B * W, C)) + summary = tf1.Summary() + image = tf1.Summary.Image(height=B * H, width=T * W, colorspace=C) + image.encoded_image_string = encode_gif(frames, fps) + summary.value.add(tag=name, image=image) + tf.summary.experimental.write_raw_pb(summary.SerializeToString(), step) + except (IOError, OSError) as e: + print('GIF summaries require ffmpeg in $PATH.', e) + frames = video.transpose((0, 2, 1, 3, 4)).reshape((1, B * H, T * W, C)) + tf.summary.image(name, frames, step) + + +def encode_gif(frames, fps): + from subprocess import Popen, PIPE + h, w, c = frames[0].shape + pxfmt = {1: 'gray', 3: 'rgb24'}[c] + cmd = ' '.join([ + f'ffmpeg -y -f rawvideo -vcodec rawvideo', + f'-r {fps:.02f} -s {w}x{h} -pix_fmt {pxfmt} -i - -filter_complex', + f'[0:v]split[x][z];[z]palettegen[y];[x]fifo[x];[x][y]paletteuse', + f'-r {fps:.02f} -f gif -']) + proc = Popen(cmd.split(' '), stdin=PIPE, stdout=PIPE, stderr=PIPE) + for image in frames: + proc.stdin.write(image.tostring()) + out, err = proc.communicate() + if proc.returncode: + raise IOError('\n'.join([' '.join(cmd), err.decode('utf8')])) + del proc + return out + + +def simulate(agent, envs, steps=0, episodes=0, state=None): + # Initialize or unpack simulation state. + if state is None: + step, episode = 0, 0 + done = np.ones(len(envs), np.bool) + length = np.zeros(len(envs), np.int32) + obs = [None] * len(envs) + agent_state = None + else: + step, episode, done, length, obs, agent_state = state + while (steps and step < steps) or (episodes and episode < episodes): + # Reset envs if necessary. + if done.any(): + indices = [index for index, d in enumerate(done) if d] + results = [envs[i].reset() for i in indices] + for index, result in zip(indices, results): + obs[index] = result + # Step agents. + obs = {k: np.stack([o[k] for o in obs]) for k in obs[0]} + action, agent_state = agent(obs, done, agent_state) + if isinstance(action, dict): + action = [ + {k: np.array(action[k][i]) for k in action} + for i in range(len(envs))] + else: + action = np.array(action) + assert len(action) == len(envs) + # Step envs. + results = [e.step(a) for e, a in zip(envs, action)] + obs, _, done = zip(*[p[:3] for p in results]) + obs = list(obs) + done = np.stack(done) + episode += int(done.sum()) + length += 1 + step += (done * length).sum() + length *= (1 - done) + # import pdb + # pdb.set_trace() + # Return new state to allow resuming the simulation. + return (step - steps, episode - episodes, done, length, obs, agent_state) + + +def save_episodes(directory, episodes): + directory = pathlib.Path(directory).expanduser() + directory.mkdir(parents=True, exist_ok=True) + timestamp = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') + filenames = [] + for episode in episodes: + identifier = str(uuid.uuid4().hex) + length = len(episode['reward']) + filename = directory / f'{timestamp}-{identifier}-{length}.npz' + with io.BytesIO() as f1: + np.savez_compressed(f1, **episode) + f1.seek(0) + with filename.open('wb') as f2: + f2.write(f1.read()) + filenames.append(filename) + return filenames + + +def sample_episodes(episodes, length=None, balance=False, seed=0): + random = np.random.RandomState(seed) + while True: + episode = random.choice(list(episodes.values())) + if length: + total = len(next(iter(episode.values()))) + available = total - length + if available < 1: + # print(f'Skipped short episode of length {available}.') + continue + if balance: + index = min(random.randint(0, total), available) + else: + index = int(random.randint(0, available + 1)) + episode = {k: v[index: index + length] for k, v in episode.items()} + yield episode + + +def load_episodes(directory, limit=None): + directory = pathlib.Path(directory).expanduser() + episodes = {} + total = 0 + for filename in reversed(sorted(directory.glob('*.npz'))): + try: + with filename.open('rb') as f: + episode = np.load(f) + episode = {k: episode[k] for k in episode.keys()} + except Exception as e: + print(f'Could not load episode: {e}') + continue + episodes[str(filename)] = episode + total += len(episode['reward']) - 1 + if limit and total >= limit: + break + return episodes + + +class DtypeDist: + + def __init__(self, dist, dtype=None): + self._dist = dist + self._dtype = dtype or prec.global_policy().compute_dtype + + @property + def name(self): + return 'DtypeDist' + + def __getattr__(self, name): + return getattr(self._dist, name) + + def mean(self): + return tf.cast(self._dist.mean(), self._dtype) + + def mode(self): + return tf.cast(self._dist.mode(), self._dtype) + + def entropy(self): + return tf.cast(self._dist.entropy(), self._dtype) + + def sample(self, *args, **kwargs): + return tf.cast(self._dist.sample(*args, **kwargs), self._dtype) + + +class SampleDist: + + def __init__(self, dist, samples=100): + self._dist = dist + self._samples = samples + + @property + def name(self): + return 'SampleDist' + + def __getattr__(self, name): + return getattr(self._dist, name) + + def mean(self): + samples = self._dist.sample(self._samples) + return tf.reduce_mean(samples, 0) + + def mode(self): + sample = self._dist.sample(self._samples) + logprob = self._dist.log_prob(sample) + return tf.gather(sample, tf.argmax(logprob))[0] + + def entropy(self): + sample = self._dist.sample(self._samples) + logprob = self.log_prob(sample) + return -tf.reduce_mean(logprob, 0) + + +class OneHotDist(tfd.OneHotCategorical): + + def __init__(self, logits=None, probs=None, dtype=None): + self._sample_dtype = dtype or prec.global_policy().compute_dtype + super().__init__(logits=logits, probs=probs) + + def mode(self): + return tf.cast(super().mode(), self._sample_dtype) + + def sample(self, sample_shape=(), seed=None): + # Straight through biased gradient estimator. + sample = tf.cast(super().sample(sample_shape, seed), self._sample_dtype) + probs = super().probs_parameter() + while len(probs.shape) < len(sample.shape): + probs = probs[None] + sample += tf.cast(probs - tf.stop_gradient(probs), self._sample_dtype) + return sample + + +class GumbleDist(tfd.RelaxedOneHotCategorical): + + def __init__(self, temp, logits=None, probs=None, dtype=None): + self._sample_dtype = dtype or prec.global_policy().compute_dtype + self._exact = tfd.OneHotCategorical(logits=logits, probs=probs) + super().__init__(temp, logits=logits, probs=probs) + + def mode(self): + return tf.cast(self._exact.mode(), self._sample_dtype) + + def entropy(self): + return tf.cast(self._exact.entropy(), self._sample_dtype) + + def sample(self, sample_shape=(), seed=None): + return tf.cast(super().sample(sample_shape, seed), self._sample_dtype) + + +class UnnormalizedHuber(tfd.Normal): + + def __init__(self, loc, scale, threshold=1, **kwargs): + self._threshold = tf.cast(threshold, loc.dtype) + super().__init__(loc, scale, **kwargs) + + def log_prob(self, event): + return -(tf.math.sqrt( + (event - self.mean()) ** 2 + self._threshold ** 2) - self._threshold) + + +class SafeTruncatedNormal(tfd.TruncatedNormal): + + def __init__(self, loc, scale, low, high, clip=1e-6, mult=1): + super().__init__(loc, scale, low, high) + self._clip = clip + self._mult = mult + + def sample(self, *args, **kwargs): + event = super().sample(*args, **kwargs) + if self._clip: + clipped = tf.clip_by_value( + event, self.low + self._clip, self.high - self._clip) + event = event - tf.stop_gradient(event) + tf.stop_gradient(clipped) + if self._mult: + event *= self._mult + return event + + +class TanhBijector(tfp.bijectors.Bijector): + + def __init__(self, validate_args=False, name='tanh'): + super().__init__( + forward_min_event_ndims=0, + validate_args=validate_args, + name=name) + + def _forward(self, x): + return tf.nn.tanh(x) + + def _inverse(self, y): + dtype = y.dtype + y = tf.cast(y, tf.float32) + y = tf.where( + tf.less_equal(tf.abs(y), 1.), + tf.clip_by_value(y, -0.99999997, 0.99999997), y) + y = tf.atanh(y) + y = tf.cast(y, dtype) + return y + + def _forward_log_det_jacobian(self, x): + log2 = tf.math.log(tf.constant(2.0, dtype=x.dtype)) + return 2.0 * (log2 - x - tf.nn.softplus(-2.0 * x)) + + +def lambda_return( + reward, value, pcont, bootstrap, lambda_, axis): + # Setting lambda=1 gives a discounted Monte Carlo return. + # Setting lambda=0 gives a fixed 1-step return. + assert reward.shape.ndims == value.shape.ndims, (reward.shape, value.shape) + if isinstance(pcont, (int, float)): + pcont = pcont * tf.ones_like(reward) + dims = list(range(reward.shape.ndims)) + dims = [axis] + dims[1:axis] + [0] + dims[axis + 1:] + if axis != 0: + reward = tf.transpose(reward, dims) + value = tf.transpose(value, dims) + pcont = tf.transpose(pcont, dims) + if bootstrap is None: + bootstrap = tf.zeros_like(value[-1]) + next_values = tf.concat([value[1:], bootstrap[None]], 0) + inputs = reward + pcont * next_values * (1 - lambda_) + returns = static_scan( + lambda agg, cur: cur[0] + cur[1] * lambda_ * agg, + (inputs, pcont), bootstrap, reverse=True) + if axis != 0: + returns = tf.transpose(returns, dims) + return returns + + +class Optimizer(tf.Module): + + def __init__( + self, name, lr, eps=1e-4, clip=None, wd=None, wd_pattern=r'.*', + opt='adam'): + assert 0 <= wd < 1 + assert not clip or 1 <= clip + self._name = name + self._clip = clip + self._wd = wd + self._wd_pattern = wd_pattern + self._opt = { + 'adam': lambda: tf.optimizers.Adam(lr, epsilon=eps), + 'nadam': lambda: tf.optimizers.Nadam(lr, epsilon=eps), + 'adamax': lambda: tf.optimizers.Adamax(lr, epsilon=eps), + 'sgd': lambda: tf.optimizers.SGD(lr), + 'momentum': lambda: tf.optimizers.SGD(lr, 0.9), + }[opt]() + self._mixed = (prec.global_policy().compute_dtype == tf.float16) + if self._mixed: + self._opt = prec.LossScaleOptimizer(self._opt, 'dynamic') + + @property + def variables(self): + return self._opt.variables() + + def __call__(self, tape, loss, modules, prefix=None): + assert loss.dtype is tf.float32, self._name + modules = modules if hasattr(modules, '__len__') else (modules,) + varibs = tf.nest.flatten([module.variables for module in modules]) + count = sum(np.prod(x.shape) for x in varibs) + print(f'Found {count} {self._name} parameters.') + assert len(loss.shape) == 0, loss.shape + tf.debugging.check_numerics(loss, self._name + '_loss') + if self._mixed: + with tape: + loss = self._opt.get_scaled_loss(loss) + grads = tape.gradient(loss, varibs) + if self._mixed: + grads = self._opt.get_unscaled_gradients(grads) + norm = tf.linalg.global_norm(grads) + if not self._mixed: + tf.debugging.check_numerics(norm, self._name + '_norm') + if self._clip: + grads, _ = tf.clip_by_global_norm(grads, self._clip, norm) + if self._wd: + self._apply_weight_decay(varibs) + self._opt.apply_gradients(zip(grads, varibs)) + metrics = {} + if prefix: + metrics[f'{prefix}/{self._name}_loss'] = loss + metrics[f'{prefix}/{self._name}_grad_norm'] = norm + if self._mixed: + metrics[f'{prefix}/{self._name}_loss_scale'] = \ + self._opt.loss_scale._current_loss_scale + else: + metrics[f'{self._name}_loss'] = loss + metrics[f'{self._name}_grad_norm'] = norm + if self._mixed: + metrics[f'{self._name}_loss_scale'] = \ + self._opt.loss_scale._current_loss_scale + return metrics + + def _apply_weight_decay(self, varibs): + nontrivial = (self._wd_pattern != r'.*') + if nontrivial: + print('Applied weight decay to variables:') + for var in varibs: + if re.search(self._wd_pattern, self._name + '/' + var.name): + if nontrivial: + print('- ' + self._name + '/' + var.name) + var.assign((1 - self._wd) * var) + + +def args_type(default): + def parse_string(x): + if default is None: + return x + if isinstance(default, bool): + return bool(['False', 'True'].index(x)) + if isinstance(default, int): + return float(x) if ('e' in x or '.' in x) else int(x) + if isinstance(default, (list, tuple)): + return tuple(args_type(default[0])(y) for y in x.split(',')) + return type(default)(x) + def parse_object(x): + if isinstance(default, (list, tuple)): + return tuple(x) + return x + return lambda x: parse_string(x) if isinstance(x, str) else parse_object(x) + + +def static_scan(fn, inputs, start, reverse=False): + last = start + outputs = [[] for _ in tf.nest.flatten(start)] + indices = range(len(tf.nest.flatten(inputs)[0])) + if reverse: + indices = reversed(indices) + for index in indices: + inp = tf.nest.map_structure(lambda x: x[index], inputs) + last = fn(last, inp) + [o.append(l) for o, l in zip(outputs, tf.nest.flatten(last))] + if reverse: + outputs = [list(reversed(x)) for x in outputs] + outputs = [tf.stack(x, 0) for x in outputs] + return tf.nest.pack_sequence_as(start, outputs) + + +def uniform_mixture(dist, dtype=None): + if dist.batch_shape[-1] == 1: + return tfd.BatchReshape(dist, dist.batch_shape[:-1]) + dtype = dtype or prec.global_policy().compute_dtype + weights = tfd.Categorical(tf.zeros(dist.batch_shape, dtype)) + return tfd.MixtureSameFamily(weights, dist) + + +def cat_mixture_entropy(dist): + if isinstance(dist, tfd.MixtureSameFamily): + probs = dist.components_distribution.probs_parameter() + else: + probs = dist.probs_parameter() + return -tf.reduce_mean( + tf.reduce_mean(probs, 2) * + tf.math.log(tf.reduce_mean(probs, 2) + 1e-8), -1) + + +@tf.function +def cem_planner( + state, num_actions, horizon, proposals, topk, iterations, imagine, + objective): + dtype = prec.global_policy().compute_dtype + B, P = list(state.values())[0].shape[0], proposals + H, A = horizon, num_actions + flat_state = {k: tf.repeat(v, P, 0) for k, v in state.items()} + mean = tf.zeros((B, H, A), dtype) + std = tf.ones((B, H, A), dtype) + for _ in range(iterations): + proposals = tf.random.normal((B, P, H, A), dtype=dtype) + proposals = proposals * std[:, None] + mean[:, None] + proposals = tf.clip_by_value(proposals, -1, 1) + flat_proposals = tf.reshape(proposals, (B * P, H, A)) + states = imagine(flat_proposals, flat_state) + scores = objective(states) + scores = tf.reshape(tf.reduce_sum(scores, -1), (B, P)) + _, indices = tf.math.top_k(scores, topk, sorted=False) + best = tf.gather(proposals, indices, axis=1, batch_dims=1) + mean, var = tf.nn.moments(best, 1) + std = tf.sqrt(var + 1e-6) + return mean[:, 0, :] + + +@tf.function +def grad_planner( + state, num_actions, horizon, proposals, iterations, imagine, objective, + kl_scale, step_size): + dtype = prec.global_policy().compute_dtype + B, P = list(state.values())[0].shape[0], proposals + H, A = horizon, num_actions + flat_state = {k: tf.repeat(v, P, 0) for k, v in state.items()} + mean = tf.zeros((B, H, A), dtype) + rawstd = 0.54 * tf.ones((B, H, A), dtype) + for _ in range(iterations): + proposals = tf.random.normal((B, P, H, A), dtype=dtype) + with tf.GradientTape(watch_accessed_variables=False) as tape: + tape.watch(mean) + tape.watch(rawstd) + std = tf.nn.softplus(rawstd) + proposals = proposals * std[:, None] + mean[:, None] + proposals = ( + tf.stop_gradient(tf.clip_by_value(proposals, -1, 1)) + + proposals - tf.stop_gradient(proposals)) + flat_proposals = tf.reshape(proposals, (B * P, H, A)) + states = imagine(flat_proposals, flat_state) + scores = objective(states) + scores = tf.reshape(tf.reduce_sum(scores, -1), (B, P)) + div = tfd.kl_divergence( + tfd.Normal(mean, std), + tfd.Normal(tf.zeros_like(mean), tf.ones_like(std))) + elbo = tf.reduce_sum(scores) - kl_scale * div + elbo /= tf.cast(tf.reduce_prod(tf.shape(scores)), dtype) + grad_mean, grad_rawstd = tape.gradient(elbo, [mean, rawstd]) + e, v = tf.nn.moments(grad_mean, [1, 2], keepdims=True) + grad_mean /= tf.sqrt(e * e + v + 1e-4) + e, v = tf.nn.moments(grad_rawstd, [1, 2], keepdims=True) + grad_rawstd /= tf.sqrt(e * e + v + 1e-4) + mean = tf.clip_by_value(mean + step_size * grad_mean, -1, 1) + rawstd = rawstd + step_size * grad_rawstd + return mean[:, 0, :] + + +class Every: + + def __init__(self, every): + self._every = every + self._last = None + + def __call__(self, step): + if not self._every: + return False + if self._last is None: + self._last = step + return True + if step >= self._last + self._every: + self._last += self._every + return True + return False + + +class Once: + + def __init__(self): + self._once = True + + def __call__(self): + if self._once: + self._once = False + return True + return False + + +class Until: + + def __init__(self, until): + self._until = until + + def __call__(self, step): + if not self._until: + return True + return step < self._until + + +def schedule(string, step): + try: + return float(string) + except ValueError: + step = tf.cast(step, tf.float32) + match = re.match(r'linear\((.+),(.+),(.+)\)', string) + if match: + initial, final, duration = [float(group) for group in match.groups()] + mix = tf.clip_by_value(step / duration, 0, 1) + return (1 - mix) * initial + mix * final + match = re.match(r'warmup\((.+),(.+)\)', string) + if match: + warmup, value = [float(group) for group in match.groups()] + scale = tf.clip_by_value(step / warmup, 0, 1) + return scale * value + match = re.match(r'exp\((.+),(.+),(.+)\)', string) + if match: + initial, final, halflife = [float(group) for group in match.groups()] + return (initial - final) * 0.5 ** (step / halflife) + final + raise NotImplementedError(string) diff --git a/DreamerV2/wrappers.py b/DreamerV2/wrappers.py new file mode 100644 index 0000000..7abd6e9 --- /dev/null +++ b/DreamerV2/wrappers.py @@ -0,0 +1,280 @@ +import threading + +import gym +import numpy as np + +class DeepMindControl: + + def __init__(self, name, action_repeat=1, size=(64, 64), camera=None): + domain, task = name.split('_', 1) + if domain == 'cup': # Only domain with multiple words. + domain = 'ball_in_cup' + if isinstance(domain, str): + from dm_control import suite + self._env = suite.load(domain, task) + else: + assert task is None + self._env = domain() + self._action_repeat = action_repeat + self._size = size + if camera is None: + camera = dict(quadruped=2).get(domain, 0) + self._camera = camera + + @property + def observation_space(self): + spaces = {} + for key, value in self._env.observation_spec().items(): + spaces[key] = gym.spaces.Box( + -np.inf, np.inf, value.shape, dtype=np.float32) + spaces['image'] = gym.spaces.Box( + 0, 255, self._size + (3,), dtype=np.uint8) + return gym.spaces.Dict(spaces) + + @property + def action_space(self): + spec = self._env.action_spec() + return gym.spaces.Box(spec.minimum, spec.maximum, dtype=np.float32) + + def step(self, action): + assert np.isfinite(action).all(), action + reward = 0 + for _ in range(self._action_repeat): + time_step = self._env.step(action) + reward += time_step.reward or 0 + if time_step.last(): + break + obs = dict(time_step.observation) + obs['image'] = self.render() + done = time_step.last() + info = {'discount': np.array(time_step.discount, np.float32)} + return obs, reward, done, info + + def reset(self): + time_step = self._env.reset() + obs = dict(time_step.observation) + obs['image'] = self.render() + return obs + + def render(self, *args, **kwargs): + if kwargs.get('mode', 'rgb_array') != 'rgb_array': + raise ValueError("Only render mode 'rgb_array' is supported.") + return self._env.physics.render(*self._size, camera_id=self._camera) + + +class Atari: + + LOCK = threading.Lock() + + def __init__( + self, name, action_repeat=4, size=(84, 84), grayscale=True, noops=30, + life_done=False, sticky_actions=True, all_actions=False): + assert size[0] == size[1] + import gym.wrappers + import gym.envs.atari + with self.LOCK: + env = gym.envs.atari.AtariEnv( + game=name, obs_type='image', frameskip=1, + repeat_action_probability=0.25 if sticky_actions else 0.0, + full_action_space=all_actions) + # Avoid unnecessary rendering in inner env. + env._get_obs = lambda: None + # Tell wrapper that the inner env has no action repeat. + env.spec = gym.envs.registration.EnvSpec('NoFrameskip-v0') + env = gym.wrappers.AtariPreprocessing( + env, noops, action_repeat, size[0], life_done, grayscale) + self._env = env + self._grayscale = grayscale + + @property + def observation_space(self): + return gym.spaces.Dict({ + 'image': self._env.observation_space, + 'ram': gym.spaces.Box(0, 255, (128,), np.uint8), + }) + + @property + def action_space(self): + return self._env.action_space + + def close(self): + return self._env.close() + + def reset(self): + with self.LOCK: + image = self._env.reset() + if self._grayscale: + image = image[..., None] + obs = {'image': image, 'ram': self._env.env._get_ram()} + return obs + + def step(self, action): + image, reward, done, info = self._env.step(action) + if self._grayscale: + image = image[..., None] + obs = {'image': image, 'ram': self._env.env._get_ram()} + return obs, reward, done, info + + def render(self, mode): + return self._env.render(mode) + +class CollectDataset: + + def __init__(self, env, callbacks=None, precision=32): + self._env = env + self._callbacks = callbacks or () + self._precision = precision + self._episode = None + + def __getattr__(self, name): + return getattr(self._env, name) + + def step(self, action): + obs, reward, done, info = self._env.step(action) + obs = {k: self._convert(v) for k, v in obs.items()} + transition = obs.copy() + transition['action'] = action + transition['reward'] = reward + transition['discount'] = info.get('discount', np.array(1 - float(done))) + self._episode.append(transition) + if done: + episode = {k: [t[k] for t in self._episode] for k in self._episode[0]} + episode = {k: self._convert(v) for k, v in episode.items()} + info['episode'] = episode + for callback in self._callbacks: + callback(episode) + return obs, reward, done, info + + def reset(self): + obs = self._env.reset() + transition = obs.copy() + transition['action'] = np.zeros(self._env.action_space.shape) + transition['reward'] = 0.0 + transition['discount'] = 1.0 + self._episode = [transition] + return obs + + def _convert(self, value): + value = np.array(value) + if np.issubdtype(value.dtype, np.floating): + dtype = {16: np.float16, 32: np.float32, 64: np.float64}[self._precision] + elif np.issubdtype(value.dtype, np.signedinteger): + dtype = {16: np.int16, 32: np.int32, 64: np.int64}[self._precision] + elif np.issubdtype(value.dtype, np.uint8): + dtype = np.uint8 + else: + raise NotImplementedError(value.dtype) + return value.astype(dtype) + + +class TimeLimit: + + def __init__(self, env, duration): + self._env = env + self._duration = duration + self._step = None + + def __getattr__(self, name): + return getattr(self._env, name) + + def step(self, action): + assert self._step is not None, 'Must reset environment.' + obs, reward, done, info = self._env.step(action) + self._step += 1 + if self._step >= self._duration: + done = True + if 'discount' not in info: + info['discount'] = np.array(1.0).astype(np.float32) + self._step = None + return obs, reward, done, info + + def reset(self): + self._step = 0 + return self._env.reset() + + +class NormalizeActions: + + def __init__(self, env): + self._env = env + self._mask = np.logical_and( + np.isfinite(env.action_space.low), + np.isfinite(env.action_space.high)) + self._low = np.where(self._mask, env.action_space.low, -1) + self._high = np.where(self._mask, env.action_space.high, 1) + + def __getattr__(self, name): + return getattr(self._env, name) + + @property + def action_space(self): + low = np.where(self._mask, -np.ones_like(self._low), self._low) + high = np.where(self._mask, np.ones_like(self._low), self._high) + return gym.spaces.Box(low, high, dtype=np.float32) + + def step(self, action): + original = (action + 1) / 2 * (self._high - self._low) + self._low + original = np.where(self._mask, original, action) + return self._env.step(original) + + +class OneHotAction: + + def __init__(self, env): + assert isinstance(env.action_space, gym.spaces.Discrete) + self._env = env + self._random = np.random.RandomState() + + def __getattr__(self, name): + return getattr(self._env, name) + + @property + def action_space(self): + shape = (self._env.action_space.n,) + space = gym.spaces.Box(low=0, high=1, shape=shape, dtype=np.float32) + space.sample = self._sample_action + return space + + def step(self, action): + index = np.argmax(action).astype(int) + reference = np.zeros_like(action) + reference[index] = 1 + if not np.allclose(reference, action): + raise ValueError(f'Invalid one-hot action:\n{action}') + return self._env.step(index) + + def reset(self): + return self._env.reset() + + def _sample_action(self): + actions = self._env.action_space.n + index = self._random.randint(0, actions) + reference = np.zeros(actions, dtype=np.float32) + reference[index] = 1.0 + return reference + + +class RewardObs: + + def __init__(self, env): + self._env = env + + def __getattr__(self, name): + return getattr(self._env, name) + + @property + def observation_space(self): + spaces = self._env.observation_space.spaces + assert 'reward' not in spaces + spaces['reward'] = gym.spaces.Box(-np.inf, np.inf, dtype=np.float32) + return gym.spaces.Dict(spaces) + + def step(self, action): + obs, reward, done, info = self._env.step(action) + obs['reward'] = reward + return obs, reward, done, info + + def reset(self): + obs = self._env.reset() + obs['reward'] = 0.0 + return obs