From 5066bc1d082c12ff97e4e426448c24543bcdb24c Mon Sep 17 00:00:00 2001 From: Vedant Dave Date: Mon, 29 May 2023 17:11:26 +0200 Subject: [PATCH] Adding Files --- __pycache__/cnn_policy.cpython-37.pyc | Bin 0 -> 2697 bytes __pycache__/cppo_agent.cpython-37.pyc | Bin 0 -> 9121 bytes __pycache__/dynamic_bottleneck.cpython-37.pyc | Bin 0 -> 6326 bytes __pycache__/mpi_utils.cpython-37.pyc | Bin 0 -> 2233 bytes __pycache__/recorder.cpython-37.pyc | Bin 0 -> 2901 bytes __pycache__/rollouts.cpython-37.pyc | Bin 0 -> 6105 bytes __pycache__/utils.cpython-37.pyc | Bin 0 -> 16754 bytes __pycache__/vec_env.cpython-37.pyc | Bin 0 -> 8453 bytes __pycache__/wrappers.cpython-37.pyc | Bin 0 -> 16921 bytes dmc2gym/.gitignore | 2 + dmc2gym/__init__.py | 52 ++ dmc2gym/__pycache__/__init__.cpython-310.pyc | Bin 0 -> 1011 bytes dmc2gym/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 1001 bytes .../natural_imgsource.cpython-37.pyc | Bin 0 -> 6581 bytes dmc2gym/__pycache__/wrappers.cpython-37.pyc | Bin 0 -> 5596 bytes dmc2gym/natural_imgsource.py | 183 +++++++ dmc2gym/wrappers.py | 198 ++++++++ environment.yml | 9 +- local_dm_control_suite/.gitignore | 2 + local_dm_control_suite/README.md | 56 ++ local_dm_control_suite/__init__.py | 151 ++++++ .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 4123 bytes .../__pycache__/acrobot.cpython-37.pyc | Bin 0 -> 4657 bytes .../__pycache__/ball_in_cup.cpython-37.pyc | Bin 0 -> 3245 bytes .../__pycache__/base.cpython-37.pyc | Bin 0 -> 4065 bytes .../__pycache__/cartpole.cpython-37.pyc | Bin 0 -> 7950 bytes .../__pycache__/cheetah.cpython-37.pyc | Bin 0 -> 2992 bytes .../__pycache__/finger.cpython-37.pyc | Bin 0 -> 7798 bytes .../__pycache__/fish.cpython-37.pyc | Bin 0 -> 6537 bytes .../__pycache__/hopper.cpython-37.pyc | Bin 0 -> 4591 bytes .../__pycache__/humanoid.cpython-37.pyc | Bin 0 -> 7042 bytes .../__pycache__/humanoid_CMU.cpython-37.pyc | Bin 0 -> 6209 bytes .../__pycache__/lqr.cpython-37.pyc | Bin 0 -> 8920 bytes .../__pycache__/manipulator.cpython-37.pyc | Bin 0 -> 9099 bytes .../__pycache__/pendulum.cpython-37.pyc | Bin 0 -> 3900 bytes .../__pycache__/point_mass.cpython-37.pyc | Bin 0 -> 4527 bytes .../__pycache__/quadruped.cpython-37.pyc | Bin 0 -> 15727 bytes .../__pycache__/reacher.cpython-37.pyc | Bin 0 -> 4304 bytes .../__pycache__/stacker.cpython-37.pyc | Bin 0 -> 6950 bytes .../__pycache__/swimmer.cpython-37.pyc | Bin 0 -> 7547 bytes .../__pycache__/walker.cpython-37.pyc | Bin 0 -> 5283 bytes local_dm_control_suite/acrobot.py | 127 +++++ local_dm_control_suite/acrobot.xml | 43 ++ local_dm_control_suite/ball_in_cup.py | 100 ++++ local_dm_control_suite/ball_in_cup.xml | 54 ++ local_dm_control_suite/base.py | 112 ++++ local_dm_control_suite/cartpole.py | 230 +++++++++ local_dm_control_suite/cartpole.xml | 37 ++ local_dm_control_suite/cheetah.py | 97 ++++ local_dm_control_suite/cheetah.xml | 73 +++ local_dm_control_suite/common/__init__.py | 39 ++ .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 1009 bytes local_dm_control_suite/common/materials.xml | 23 + .../common/materials_white_floor.xml | 23 + local_dm_control_suite/common/skybox.xml | 6 + local_dm_control_suite/common/visual.xml | 7 + local_dm_control_suite/demos/mocap_demo.py | 84 +++ local_dm_control_suite/demos/zeros.amc | 213 ++++++++ local_dm_control_suite/explore.py | 84 +++ local_dm_control_suite/finger.py | 217 ++++++++ local_dm_control_suite/finger.xml | 72 +++ local_dm_control_suite/fish.py | 176 +++++++ local_dm_control_suite/fish.xml | 85 ++++ local_dm_control_suite/hopper.py | 138 +++++ local_dm_control_suite/hopper.xml | 66 +++ local_dm_control_suite/humanoid.py | 211 ++++++++ local_dm_control_suite/humanoid.xml | 202 ++++++++ local_dm_control_suite/humanoid_CMU.py | 179 +++++++ local_dm_control_suite/humanoid_CMU.xml | 289 +++++++++++ local_dm_control_suite/lqr.py | 272 ++++++++++ local_dm_control_suite/lqr.xml | 26 + local_dm_control_suite/lqr_solver.py | 142 ++++++ local_dm_control_suite/manipulator.py | 290 +++++++++++ local_dm_control_suite/manipulator.xml | 211 ++++++++ local_dm_control_suite/pendulum.py | 114 +++++ local_dm_control_suite/pendulum.xml | 26 + local_dm_control_suite/point_mass.py | 130 +++++ local_dm_control_suite/point_mass.xml | 49 ++ local_dm_control_suite/quadruped.py | 480 ++++++++++++++++++ local_dm_control_suite/quadruped.xml | 329 ++++++++++++ local_dm_control_suite/reacher.py | 116 +++++ local_dm_control_suite/reacher.xml | 47 ++ local_dm_control_suite/stacker.py | 208 ++++++++ local_dm_control_suite/stacker.xml | 193 +++++++ local_dm_control_suite/swimmer.py | 215 ++++++++ local_dm_control_suite/swimmer.xml | 57 +++ local_dm_control_suite/tests/domains_test.py | 292 +++++++++++ local_dm_control_suite/tests/loader_test.py | 52 ++ local_dm_control_suite/tests/lqr_test.py | 88 ++++ local_dm_control_suite/utils/__init__.py | 16 + local_dm_control_suite/utils/parse_amc.py | 251 +++++++++ .../utils/parse_amc_test.py | 68 +++ local_dm_control_suite/utils/randomizers.py | 91 ++++ .../utils/randomizers_test.py | 164 ++++++ local_dm_control_suite/walker.py | 158 ++++++ local_dm_control_suite/walker.xml | 70 +++ local_dm_control_suite/wrappers/__init__.py | 16 + .../wrappers/action_noise.py | 74 +++ .../wrappers/action_noise_test.py | 136 +++++ local_dm_control_suite/wrappers/pixels.py | 120 +++++ .../wrappers/pixels_test.py | 133 +++++ run.py | 28 +- utils.py | 32 ++ 103 files changed, 8329 insertions(+), 5 deletions(-) create mode 100644 __pycache__/cnn_policy.cpython-37.pyc create mode 100644 __pycache__/cppo_agent.cpython-37.pyc create mode 100644 __pycache__/dynamic_bottleneck.cpython-37.pyc create mode 100644 __pycache__/mpi_utils.cpython-37.pyc create mode 100644 __pycache__/recorder.cpython-37.pyc create mode 100644 __pycache__/rollouts.cpython-37.pyc create mode 100644 __pycache__/utils.cpython-37.pyc create mode 100644 __pycache__/vec_env.cpython-37.pyc create mode 100644 __pycache__/wrappers.cpython-37.pyc create mode 100644 dmc2gym/.gitignore create mode 100644 dmc2gym/__init__.py create mode 100644 dmc2gym/__pycache__/__init__.cpython-310.pyc create mode 100644 dmc2gym/__pycache__/__init__.cpython-37.pyc create mode 100644 dmc2gym/__pycache__/natural_imgsource.cpython-37.pyc create mode 100644 dmc2gym/__pycache__/wrappers.cpython-37.pyc create mode 100644 dmc2gym/natural_imgsource.py create mode 100644 dmc2gym/wrappers.py create mode 100644 local_dm_control_suite/.gitignore create mode 100755 local_dm_control_suite/README.md create mode 100755 local_dm_control_suite/__init__.py create mode 100644 local_dm_control_suite/__pycache__/__init__.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/acrobot.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/ball_in_cup.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/base.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/cartpole.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/cheetah.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/finger.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/fish.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/hopper.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/humanoid.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/humanoid_CMU.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/lqr.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/manipulator.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/pendulum.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/point_mass.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/quadruped.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/reacher.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/stacker.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/swimmer.cpython-37.pyc create mode 100644 local_dm_control_suite/__pycache__/walker.cpython-37.pyc create mode 100755 local_dm_control_suite/acrobot.py create mode 100755 local_dm_control_suite/acrobot.xml create mode 100755 local_dm_control_suite/ball_in_cup.py create mode 100755 local_dm_control_suite/ball_in_cup.xml create mode 100755 local_dm_control_suite/base.py create mode 100755 local_dm_control_suite/cartpole.py create mode 100755 local_dm_control_suite/cartpole.xml create mode 100755 local_dm_control_suite/cheetah.py create mode 100755 local_dm_control_suite/cheetah.xml create mode 100755 local_dm_control_suite/common/__init__.py create mode 100644 local_dm_control_suite/common/__pycache__/__init__.cpython-37.pyc create mode 100755 local_dm_control_suite/common/materials.xml create mode 100755 local_dm_control_suite/common/materials_white_floor.xml create mode 100755 local_dm_control_suite/common/skybox.xml create mode 100755 local_dm_control_suite/common/visual.xml create mode 100755 local_dm_control_suite/demos/mocap_demo.py create mode 100755 local_dm_control_suite/demos/zeros.amc create mode 100755 local_dm_control_suite/explore.py create mode 100755 local_dm_control_suite/finger.py create mode 100755 local_dm_control_suite/finger.xml create mode 100755 local_dm_control_suite/fish.py create mode 100755 local_dm_control_suite/fish.xml create mode 100755 local_dm_control_suite/hopper.py create mode 100755 local_dm_control_suite/hopper.xml create mode 100755 local_dm_control_suite/humanoid.py create mode 100755 local_dm_control_suite/humanoid.xml create mode 100755 local_dm_control_suite/humanoid_CMU.py create mode 100755 local_dm_control_suite/humanoid_CMU.xml create mode 100755 local_dm_control_suite/lqr.py create mode 100755 local_dm_control_suite/lqr.xml create mode 100755 local_dm_control_suite/lqr_solver.py create mode 100755 local_dm_control_suite/manipulator.py create mode 100755 local_dm_control_suite/manipulator.xml create mode 100755 local_dm_control_suite/pendulum.py create mode 100755 local_dm_control_suite/pendulum.xml create mode 100755 local_dm_control_suite/point_mass.py create mode 100755 local_dm_control_suite/point_mass.xml create mode 100755 local_dm_control_suite/quadruped.py create mode 100755 local_dm_control_suite/quadruped.xml create mode 100755 local_dm_control_suite/reacher.py create mode 100755 local_dm_control_suite/reacher.xml create mode 100755 local_dm_control_suite/stacker.py create mode 100755 local_dm_control_suite/stacker.xml create mode 100755 local_dm_control_suite/swimmer.py create mode 100755 local_dm_control_suite/swimmer.xml create mode 100755 local_dm_control_suite/tests/domains_test.py create mode 100755 local_dm_control_suite/tests/loader_test.py create mode 100755 local_dm_control_suite/tests/lqr_test.py create mode 100755 local_dm_control_suite/utils/__init__.py create mode 100755 local_dm_control_suite/utils/parse_amc.py create mode 100755 local_dm_control_suite/utils/parse_amc_test.py create mode 100755 local_dm_control_suite/utils/randomizers.py create mode 100755 local_dm_control_suite/utils/randomizers_test.py create mode 100755 local_dm_control_suite/walker.py create mode 100755 local_dm_control_suite/walker.xml create mode 100755 local_dm_control_suite/wrappers/__init__.py create mode 100755 local_dm_control_suite/wrappers/action_noise.py create mode 100755 local_dm_control_suite/wrappers/action_noise_test.py create mode 100755 local_dm_control_suite/wrappers/pixels.py create mode 100755 local_dm_control_suite/wrappers/pixels_test.py diff --git a/__pycache__/cnn_policy.cpython-37.pyc b/__pycache__/cnn_policy.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aad643bcf04e72c6cc9af7663f62812f7c220198 GIT binary patch literal 2697 zcmZuzOK%*<5$>MH?(FP?G$m6aWh-M@iL&FK32(9`Vh-@|xTB0W` zVL#`g-8vXLq9eM`S?CH6F8ZeTd7gh&q{;Nibjs#EZQ=wQn`p#;j#mAHGgf!iKqERQ zoP)1f#A^@KsvS_fc0nEOo^|TpIsY4f@n>jpjCE@Z)YHA%I-$O=-9;a5Agr1%hBpvf z;hfr#jB2*<7aPJA?tw*)p3=-s;W`ZOH_*0f(z{l7K(E(5&^Kw$+4eaT-VR&A_Sq04 zqjSFM>l?(K@YMkCoAm&}@EeOedKV)*b^nCy--0jjxs|^7f;0VABenJ0qJL_y2Ivp! zD?Kr&eRw%Mbyh=?ijg3%sXzJ-S@1RchW&wIl_9MGo!xpw9PR9{YwFGH9op9xR`btp z*TZw(?sK(KZ=C()oMZvGcN*>;aCe{?oKp*mO9|k}F^@9)a ze=zPh1xsq#W0*`aZ8ot+KjHo$XI!5 zx)%VMPK$=I^vsM7W0l1Fsf?;A0%1Io5%k7e1FbAtwuf$8c1*uap)fB}A(e5l+{gFT z5IR_V9u@oHRuyL@EM8HAD4wcojoM+Uq%hqJ^o_R$8_{Z<=$EmIv#1H3>e7?}>RhzO z!3?Qt0IMbE6Rz-Ofc5``<76+yA^0aJZ z<4xmKGM%Zn$kN+H-X(IA$PSUaM7D|CAo5d?uwRPCZAxfkRdd6C2)h?!>JF*V=Ksmm zJ-q(=-rl^(opnhM67MEz(4u#X zLD#2RzUZS31dR?+v)vU(9b#>@!y13lBWO9Oc)X??(0%ff95cO3*m02PS{XYn1_X)e zrZKLwJZ-QUZpCiUTY>Ya6)Tv4}t3euc~H z2|uM1NDqa8(!0TCAB<)vKn5|y?X$prwr~!Jx;1@sFx(GpX6>*9*qvH`v{%4y%@uMg!#1hpf zGJ`1kei^5&M12g^1_JdNaj&pxxCS6fds@43z_a{ccq2t)jq44&8 zj2e;TxE7{GmKFI#B$Zal{*tP4)f7$Cbe*JC6D!pu69L#3C_2t<Ft1KBF4n`i6rZ*gpbghN(UP literal 0 HcmV?d00001 diff --git a/__pycache__/cppo_agent.cpython-37.pyc b/__pycache__/cppo_agent.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ed3616977b0d439ed3bd7f13585ce782d4cbf8fc GIT binary patch literal 9121 zcmb_iTZ|l6TCQ7HS6`>6=YH{coH%yc@hx#~WWDy-apHJmkF(=!vMME%YWh_7bi2B` zeX4qF57q37@e;3umn^X2qDAU{LE?!O61)J5SP;)Vfpi`a0$%a}T7d*oR$}@7Q{B^^ z$t>CjdVKzKKj%NU|Ns8Ke6>)>D){~6k0bUg=N08&C^PyQNUR}*e*nT1ruvGV#Hy{y zylxwaH9yrdZBwN>-A}hH+mdO+&$P03R;E*au9dg*GHv>WR?#lXblRV2mF$vCTmEFL zY?oV8_LR(L{OQ(=J*z67(VVmAuPZFea*q_2^A>h>dlC6ODwdz)!6Yk%F-Kc-*0!LiYDeZt8N%MQH{n9of>bq9Nun6qEz=Hr-jiRH(*XE zhs@pCYI&^>yy{!Qc9qy0{d6SO5W)=*Pq9^|U}IHVXByKVDcIa8mSW~3)lRWAvruBP z49g-;vmDDKwpf7`5og#0D&;Z1~aSyPmF zDpnjNQu|u04%APzUsrGfD$2kLs1Z{-6;s-bbWO~ZyGJ08J0dfP*5@>bH{tLOqX@D$S=$*%$P#$ zw9=bH>2oMsil^tP{XEk1Ii;->Q|CkQS*E}O|#G(a^9oV8li%4Dqf10sfL+%4XW?0#z*3nIVGM$ zP@EUg^97X5#WV5Ic$KBUPp5R6QQesjvHxFHHk7X;#+$O%-{*95>InG#)8)@_V@A0j zU#>s-w?FCs_uu~DQpFT{gg2md>bACB6FMH5u-s0Ew;$Z~g;n(%of>zmm9wJE9tLi! zQFS)kQRI7pSG_B=s3wZ?sc!n76IRrijCyA^OIZC7qVrPbnb*uapDtwg+)7!O#yA*e$M%y;g z2zzU1J9Sy^g)cspe#y=c%bl>xxt$*7Y{@IFl9y=cNsvEOI0?wlPoCR?!Z^3>F*k_L z;r2ziH`lJ7!@p{$({|iC)_E4&Y&i}tPUJY#=oWq*MA1#PsG2|5-?TdVE2$;5j6X}8 zN6NzAikerm_*+z0w4yqz@60?c*w2pU<#|k5CB03*96}L+)Qpt!5hVO$wW;;>fwHT9 zNBKy5tU3llDl+@&$b{s}KvEb;4RcIo+7qppin5S8xqhA*EcHl`x37X5BUTccKzzo{Uh&G8oYeN|>eGoT5^eeWLcv*z8XuHxnC>d}-un z*)OpSm*ylZy@z0IrGKn%e%zgB}OA&j@J5X1Il07 zOI?*xeqvo{*REDn`I2`^r%s)cFMqY;a<>(}A+ZB4l0vfIM7VdD z_ZO)Eox-tD7iK^cg~Fh6D8shjs6Mm{q&=Zz6J=@_NUx5w6A2ms+Ll;6l$xRPrqFj9 zxL|bL{1WCDsYc|rpuR|?JHQ&yGIX z>sPMce9QUZ%Ek>!+<}7SU`!so(8F$vr)ZSyo40S>az4Dhar2t6uG7UPV}afWKPTCU zIBXD6*k?inQEs-nj0E_6FQl|h;0!i zE5yzuZ5_6ma00AzSERAZ(1Y&Qc{dOisgm%vJq#`%#jspOJ-{WV$uK@T3%KxQHf?MxEWPN(Jh zc-+p@QHC)3$RJE|@?g=(&X2d9ZOSc^F9r`vPOwH3khl|*Ul3&^V#4=86m=1rzYGK* zW32>Skx$POPC$BDoz)0s5Z1_Rd9{RK05=$We_3FOdEEkzu+TEA?yUUZGIXEBAp}7Z zEhOOffb1RRn(}eJNAg}Bz*f9xk<9>GL0!kH=L)C{|iOpiyosPxQxD<8_zWDGf$m9)5+ zoPn_}b%vDhpZyfSN``l_L;OCG2*j4}iOd!|PcI88`3^*ua?)4`?c!cq*6|0_`ymK~ z`Whl(Kq`j`WFO;un#0c0qpG_e*-UC=!$^4$VGal@Bw%-hc2*X(sZ^wx=i}AU0gg3^ zn1%+gc75nYdiX%zfZ5?EXcNwYDDra8s;A`zKaJdZ^_04EY~P(ZY=pggO&Fc-j*bxi zD=0Li!Z%?%Yd~=-A>T(pl}){AG>51bTGqskPZN6nNbhMqy$AH%wE7u%MsUBTfQ$#a zN+Y2cMj~uzHFE=XN$I76B1(Y4s17JQg;XIXT11#YC^6%S0USDsbh$r8C=KznjAw@V znPFaU&JIf`g*MR~T2mU_AvGhp)G#9$`V>RWt1%=9GRv*D~k${`B5e@I8thV zbzF~CXXW0K6U*x5P;;#LOiZ!fe0CSu8L8vLyfMr_H_Xot^CyP+lf(S6=JUgn%CO{= z%+CySFUTBLP}Az^UOvwEPh*xd5`F8PoNh;D^x# zw0oKG{Kr4&;Ut@{^k0p$WH&r`6kTAnhT$l%K1j~^y$SfXGIL6=1kcz+Tp}d)ZRHaM zcZ>X%R-7l>pf?#$_FqG9iQN%P z)TjkqjgwC_vZyfn+s*6!cLuP>SefpKp)_wW>oph!&JFavf%2(~WUal%rpY&pGQIhg z{<{O{3$hroCe+E&o0MZSv3b}0ROgRTev8e>IoA8@1En{`W@Ea$x94%sFzy`2rTlxi zeI}c~nDFvddE52ohP-{tkeyHWSxEXgx8Z{<*EHy-#l#7UH!lqtQX&#c6Uvo_OsS_n zhUh0#X1@ue@n1q4VJ1*v1cr6UKp^%tRE=eqg!{jZqLICFa-UdAMmWq!k@kDV;NPa< zq%UMouvW5m8gh_a%eF~KC}MURI4CI~K!k#f6y37ON~CzigywDA=C!MYq3v7}C(v;U zP?|h6oA78j3CfN{{^?x(oj>~X-}{e$*tsNi$)4yOnSgTV-T<$(V@$oPakzBvkaAK1 z4HR-FGh9-t#qlUjy+$Qawsdvj?j(mjloi<_iN+#%jOCNZ2Ohq}L8JLzSbMBM zkE_2qwrmq1r%THZj?XQjLuL>z|B(htjvb3L;4*~OMg|5aWxO{p?H)ZTIqzvCu4iVF6Zt;iIx(KL=Z=)?{iui**4pr&ts(R zspKvtpMHiPl;8@z2j8d;xKFlx=OODZzc`fn<0OJ+MEIrCWL_(FlqUSv^0>=P$ES%OwizOP-~K!{JTU5RZ8EkFu@6H z!&e?|b!#=><4>rHjN!!Xd!AC{`vudT4o%adl7PrB5+U~i-vkk*WM8|jJ-2^Gn)8jI z*5)=f`_tE5j)%3 zq$Lmr7OErJq8o#45L%6iJmmA<5^W<2+V@A#^LL2NH6quD(60U#LikM(1t#4JOu=b& zUMp(zng%?a{E^dmmW)1_g)kU33nt<;T4rH3=F?@kmlJ0*OiPUf=JTK>v|FSe(%d|4 zS{R|MXMweg_{+lBB$`EwGEyg0Q`?z3M8WNo3iN*H#fGaOWVs}EBPodi`EyCSs_=WN z-zP1tkrgTL&7Yw;-anZy_QcRzGxGh&7N}f9rN|B6qr4l`>wuKN+l2A|@2CjLS|kMl z8*_3W-_&A{Ym!Cu7A4nNBt~41tXz6)i5Cg)J^7k~Ps+&=QKy6fd00P0_CFD%gi_R< zrG3+qBWgTEl_sSh#e3UULMCB=a0!6Zp0S;<5$+pfWOZxX2aRt2ml){35DsVMz%Xh8 zNb>Y$gM4U?BeD)YpR+E$&^aRSIQVAdCpC%7=PeA)cZvKe5h?%VLH-40X&)p9PaTfj zD!EJay@I~GgyeC7??SU)&VI=7h9?YB=G})AQTmgHTbg{yl01(Yr$zsShB81D zf*s2nUvx|DeWf{^4U)Wc!cdSFFH2UG0H4vDR4SdiWa@)Sl#H(6-=NPv?j&$Lz#VP> zPZ9Qjk)@T@oyEgAOy1CXCB^@WM*b0zza}D&Ri5K`@bUU-Z$C$n>xbMihK zFP$(0{nv}c`)8u(%a}g}2JH+D(~TN_D1Ee$>?1dF8r33|R1t!dx)vW7y{bd5D-r;F zgW62db7fx5?fUQ*!nKMI4pM~)v%T4bGg;nKxdVsYa$#Le-p?fxF!JVFkjf-&$A2V& KO}+FDrTM=!Pa3-b literal 0 HcmV?d00001 diff --git a/__pycache__/dynamic_bottleneck.cpython-37.pyc b/__pycache__/dynamic_bottleneck.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eb914aa3fa4f06058818479df64440dc42dd2ce0 GIT binary patch literal 6326 zcmb7I-H#l{ai5-#{ocLh-Zw>wN0FkuG)+=e;t!c7#p97DN~g0TGLk)(Jeb>_+neR= z%&KRWAB#Cd*3lvzfU@Hk!-gJq4dlVkdCE)jKjg8W0wf4v%7a986@7j zuI{exs;;iCuBx8ZYPF={$NoOyAD_~+f2GdkXQJ^YO56e98fSrKGga%hp=#5%Q0qai zo44~!^NdcxE&^i!Q?g6lvR&?0>`J$4SGzU4)}66ux^=tWJz^i}HtYt|)-`T&>!HRi zZ+6eH=Qz8iHFNt|PupxVnQwbZ?8R}jDC^sSn`nX-R;M*?}j)0`z?}u@-xtQ6D58Rz*D-jbw0}t zZa!qT;bG@I_fXqowuN?{7gRgPi@fwuv-5n8S9ldO3%tf>fGP4iKZ3f%8_?3cJhJv6 zbi00QHA<2ILbdMfYr3XMeQmW>#FF$|MxhhFh7vb$qG=)394*ZQvXlYpDeD-h4b&!T z6SajJrK5LpXkji|M2CV_5v^jX1C~;vET=?QNr|eO5=AYU8P?Oni2d65)dx&Vj)0DF z#`Pye)fo4Uaep?MBP)19(#$6dJU4DHCQHythF?azFm4}BULYOuvf_3und!WU@)ECL z6|-}EQR7u!dqR?}K#mi@=3$fCFLXXba9dy2fLm#!KO%WvPPO4lnl&N`PN7A;mr2`d zg;&tiJEt-BD%SZ1dat5bohEPaS+&j?aF_>$-%N{qZjbn%g%rmza(4Kw zk%qBSa;|e8xC?1%#8AIRHJ?{&T})o@{KfDuX*SBmk-l5Tn%_>#d;x2rr}%tlr0rt2 z!^N}$e$})({O(8_es84lg=LK|-qC-p3oUu$0Kdoh5@BOA`Tl1NvQ_!=p1xa4YeV8S zlg@mN!%@Xy6&$AcDTf=x0c$U(Gg!M0`7fb&6mq>XV!#|p>tEybg5vcBcun!()lky> zX3XnYT2GHCy){U~;Q52(GC#Hle*74J5&ND6{z~V~bT)19m(ZW%$J4nvZFfGon$D*s z>}4K%SBUEnP%inbQ`v-KCT? zUj}Yz3D)CWsbK2o{MmfeT9vW5cjGv+`tFvEAQD>YLR_k2Do>#u4`*|ID zy)mNI)-eVxch;4Ku73z?{wwW$?MGzCZ=saawuyxtT9D;axntOhZxE3zL4=uA&3qICiT| zChG7j3frD~MZ1AXaFt`9s`%OqPvOQ4+aSm9Pgm?3CbSM5F9azkUbjb{wl6%jPE+69 zXP7PX0lc#lZ8Z&P@#I0zlUBUrVlGJ|8UVKuJPH%3_u&*ZAWZkZAGiPOkN@${?{2+! z1uGaq)BCBlkD{HGOx1+=jR<1?eUKHYJ;x`7y+Uog1$?Xa*aR zxn30btp_r{}IdRWzq2;!+mTmQRpkZl5Td~Yzyz9B4%+bVzixb4A7CV?gtkq{TVeLw!8_#aQkhkC)=`icPyQgNo5hc3fUcwnylcioIl)~+Mmc_ zrd>ab%wwvU1&}$+Z@Gz`554R*2ohXqk zv~xItp36m^c$bj#;B=AM#L`qJU?kgI<~CItBdviaB8-4Mmc^d%BcY~ZQcqSvln8&T zPl-(|t2ii*oJBxOIOvk_cppa-_q#F|yWL*k$s!4^E|ScyT97VwFhxYY2hKe|*`YKFcSTTY zDGSq_$|O^f<*`h(QM;(D#}Bt7ISATk@;I?9 zWJxF-3e=RPqc}%Fsb$8Glo?#i*r5GKO?-^8ug;&}!G_P@^|%`*=b?|p6AoMj{&|)@ zI$P8H@m%kLEI5u*m{SFL{3(FOa_XA7&{GI1GAQl2CEo# zb_{jRAi4{zq!-u)v>SjmwggIWG%6UWu{q4GV1!Bm{RZYO84GN1^qEBW+(gJ01qAw0 zO;(j9x@aUwIZ<>X)Kn&LdB$Qm7ZD++=v{g=gc;r4dk@gBsozP|Cp z)vFuM2Up*}d3AN;`mJ%WQ(DcyPr*X`2w(>#rXZu~gBPEb^^jpbU)(uRCfZPC$PW?I z*YGA7(M!nKoDvXw@{_kv;w6AKCVpJs#U6)9HbaycZ{v`$VNy#Bv#BeUCcmk>MMAij*ogubbZ7O15Gr$XVD zZqifBi63J^Fj)ucI{tsG(^6^$0r_(B&+2?@IBg^|I9EI&D1E|{lUhjBkOZM;PVv?l zZzL?$kqG7Pm|`Pq0~&kmjvl|4=tznUo*&P_GiPX~&?qlblGN6AO%&@Xs?Uk){@aO# zr;xFyuW27&`bgi+?dDT!m`n1*0v=hp5x6idH76KeN*NN!JQ7Kk<|wtyW_j<& z_E@$Gr22Oy6-&)SxUUp0k0+S*g_cllexYrXZI?F(3+h@=Ll3TameXZf+F(8#J{0{Z z;~O0NI+gR9q8mHO!Sb_2;~_yeZg3L$$juNL$=Q{0WLr7CaUHY>0KjSRsA( zQ3jBuU!+tOd6ATLU1T6-BqBWOQr^*%24ztfXv#^VE+mn&9Yk&-nuI$;;2i=N3A{!? z31ul;lI*@%0M4Gd@4$qe#P3q%?8V|IG?OB022>3AHoBjo#47+ABA1C+SI~1XoYQPc zZ?FYm8v2~V4_a)8z$j1()&@I{m{V1WOLfeXIDGn2VY`|ILd(5tduSe!l?U!(Ysks0g9N_7Mea0TK zZARw)u<%QZuErznk^abdWInRCE%p7ZfNKI5Nimb_P)aV%K@IuMeTo>$wx${-k4558 z-X7EOCa~|v8hkN?SyZjPEKg96NEoOUj&*586maiU;p&5s%q!jfi|5B3PKto0Q%M|VR5bvQj9mRO8 zKUO|sSwqi!%ntRe$J(Ul8zcR2Zjcki9w*%o1>H6A4Fa^3_$~m#xJT`uQ`?e;+lsT7 zjvj4gm&1To4w*kW@xMdKatD3zy+3>R(+tAOZng>lEwGg3>~-aZ(45#HiSn7TK0-T_ z`E#`FLg=W&iNzZ<@G}B*M9rL_aq$@ewM9j!;>BaasUw_y&gp!K(YOwvjjuD3Q)5A| zv%xF>ul;R8k935h2GC?Tnq^9@>641%$dW@()P6wiisO9JcY|z1IG8Ofae@!bt*)2s zAb+D2Q$<2SaY>N35w{5Z4S@q+G&M<4!2p$*)<%N;lZsg}3ysoz!BS)RcwE9uB#s1J z;=L@UE*v1#hi-Sv5B%hT@*nWM{ci6;mfl%-Q3kQHq0F!RfW)9{tvsk8SFaxG>J2N@ pHKdp)N(AVsPTvve>yVP=c^kl;%G7h2u53(O$sQjSJ*S^F{|664vXB4( literal 0 HcmV?d00001 diff --git a/__pycache__/mpi_utils.cpython-37.pyc b/__pycache__/mpi_utils.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ccd9375fcde7690fd866b664abfa4559254109b7 GIT binary patch literal 2233 zcmb7F&yO256!wq#m28r3m#P$ms!&9wk|Ak)QJtv#((jvf#J+^23-sksy&%V>`wh5Hqelc?9 z6Y>WRPKyVfTTt~4Xe5!eAOq^2Y{22k3UA^Md`j*S$)$Hhq?hq=Fb)s-Ad>U}iT#(b zlI+FQ^84R@mEz#EFzDQZs{7Dn&hUUq3Wk~FM`YkhPx|omWdMdkyLrFPZ_8xzO>Oc? zKFic>7l-}oblf)&6Vp!)GL?)n-5;q$=2>ZUKS@=kb$?Rl{aRINrgf&bQ}mC8uj<$E z)E_|8(1r+N=$MW9*lWm;HtYqZ5CFhI_hR3AX*HQxW`@=`D#=URJqOH2LgZy`gowHI zbXE)%M$!iRj}tZ0zY?_p!&ldL9#)fV=OB|wX?9GeO{T=%J3H_PsA6jJLT}ekR0JPm z)o6s!fX?3gdnmR~9j3S&EM2HTCqYm8cW4Y5yB!@f*dH76hQFY5Pg2Q_NaNilhcJR~ zJRovCgA6=pb70_XZXU?cjRLnOJZ5A6Bv=sl3>SovdGw5$=%i)ZO?U{s@y3EZBTvaw z`V{=iNVblmdCQQKPSa}q5e2Us3qEf*?UQcfk9!MgdxtaU@Ht2 zjAeSXX1S4Rne%sbBzQrYm0E74rzWW3k5H6%fLyn1o zgqQR=2l;tpAI8BkxIcz-4(EWhPN73 zL+B;=&A$9UB0O(u%>WX=7UFdnIP$L|`B2%MFU8pKFP4_A_03V}O0sx4TiErg24YYlH1^^fnPh{sD$~R)V;$Ygj8I9rZ(UF0Rp3JHL#NV~ z>*>UXF5I7gX2bf47*5O7wlpkrvUN~F!gY}w%V&8VcU|DESHLx4qk}{_RV%z0Z4VP# zi=>nSUTD?F$-o)XUXW|nGm)u#H5nb z0{a~y9!-;C^(GWTR;du~-f<9KOD%W4IBa;@Vjej`M$}`Gr*JNASsw%q_m(52ZL(5V m3g~#eJiCHPo!@}Bm?vt@x?B1-KQ`K3;=GB8yK)xM4*Lt_(gwu< literal 0 HcmV?d00001 diff --git a/__pycache__/recorder.cpython-37.pyc b/__pycache__/recorder.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..17602f929d21f3123f199121f9bb03ea8e49587e GIT binary patch literal 2901 zcmcImOK%)S5bo}IZ0~yQB;Yu)c^O2I76}`nNJz*;Q4%RaA`T>ngq5PvW_#?6Jv%e$ z8Ry~5o*ao>5JE!Yg3y{9zlU3k8(ebbgmQtedS=%fV_pYl-PO;js;Td*YG0V0Z81DQ zep9HOCSyO-Wc;`ooJY&oAQV$PX5FgiU5>ek#lY!0oYtJ!9e7=jv&&4m%6rU|7y5fn zx1soT*72W016%KKK5LzMgp3s@;2dC5z$#4xJu z6;8?b_$9VSXI$8Z56`W6_{hU+K72d8*7%%laOGoNKKH&Xnp>^W%sx|%ZK1y_+M`(| zpRg^d8jEaSP)P9|&GkNqr}R*8$o1=J`EiI%RapTs+ZEH`s@8e|{`Ef`Pt zlFiUKy?$bL?ZTiKC`<@#>W;!HuieKE7n+WqboPZENC_ zap{eYqlsaf0M-OU{lV2L;cw)aY&6T~AsF}ZYw`K|cbd=f-4lNzXP;i6hw2(J>mpkI z8^nWO3*^j}P3&903}q=xLGyWbiVd43R~)Ihh80mZ7um=uol+thIcIm&%u&sZXE(w<-I@y2?v)~MI~$y$R(`lni}18weh0o6K9O-$0n zUws0?_`NI(6J<_DIoPQ4BB)=5rZCe4j(&jz(b}{X$TL3CX<1B-ETRF`oy@qztxB&~ zv8PdU3m#R}GHwEf%m-Vt}Gx+!XK{ zKa2X?acEq^&v^Yf>}iW=$E)&khAh5BYUDWI4X=xkt=@!)egoodG+W*nppXEhg1+^+Agw>uc98>NH{TdF~jQBfGUdN#i(8iv; zI`QPlG@H0WZcOUho0wElDV&i@kr{bJJ)#Wm6R|Hq)hbrIGpP2DhQXuZO19l;8iB&? zX1(H$aktVasmgPMZX6Y9<8P#T&?^o{wl3T@vhTt6p?uU-bU#GP>3>JYm-!Ud)0f#g z96*Vudo2h|D+mUu+KFl04uZQoy|~)Zi_oGwB#2j<(9;w+YiP>&0l;LK!jlsPz`nLz zkh9W}j;5WC)MwG@RS46(-NQ8+C1GBDyg$gIv)RMy4)fF7TVcPjZyjaG$hFGnb9C3{ M#C=#3ZQd4t0L5WmdH?_b literal 0 HcmV?d00001 diff --git a/__pycache__/rollouts.cpython-37.pyc b/__pycache__/rollouts.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6782b3e3b8b226e5add7a9ecc11d9fc73e28981b GIT binary patch literal 6105 zcma)AO^n;d73Po>MNvPi)oOp^IJV<75!%>}<2FU=I*DT-O`B{22dDv|DZ$lHYiTJ` z9#U(sKotenscjT49JFYA)bL!H^r1n@SV_!=6naTaI;)@B2}tuyNDfzdYcHiE*y zYFlc|42lE0Z8PmXjTg9eSL2pn+Bez`*FVu(#m87lYw5DU{aYhnmbu^cMnS@Ron*7c zr19a$@8e}(_dAi`zUUD7?56{G8;@eg*V>G0ZJjf&-__a%H@JznnQhuG@FKU-w|I#= zco%t@SMauZmDlht@ddC`m&NN*5JaP-Q_XjwH_2_#M*m`(mKv$%YN?5qr3JKls`VKj zy{VyRq<~#B#i(0IEwmQR=vy0vEv7}Zc50(7r4_W!J?$RrW^B`FIjKymYKGC(lS;pK zKr1aAJ6b<>bn)2H#^F)$Q|K@KnK&=>n`(#UBX~2%n>l`Ej@P?1PvPhMtpdCD4X)Qg zw+?&IKGW$qy2iRT-8djUJO|9ev6vHF+a_&VN}JDT>vq|Er;};<5fgt*UH}bN>RY_< zP!DxZtDZ_ubBm6DpU&V+ayofYL1z{ArEQ}5j%r`#bc(NVjcjnULzYB8dRusw@WdNv z?rVuQ#bRVt2S!u7&r;UZR%x2CxeWsRzWT=ucngNCheK(0g5D5t(Z7?p!oMwxy-)!d ztL9^FC-S>pS<{U(8k>(f#&934fp;hH!xj^yF+q3aP*bDs0-EmoXEo17tw54TFmT3I5@Rr=-)Nlx&IU7hGZ#Dyd>zw33yVXfHjN|2e6l%;Y2QaLE!H8 z;$Gr&S47bumJSWY@TERrQa0>v6eg{zG-E&LiZ$AjOiG=XSSDzN8dAS_j+ztHkZp)E z8o7|MhV_--jK%YWB8zD0iXx*Wn~?U*GD))d3L4F@82!~$Yq3i9*6aET%l=NWQ|u&q z8*D?bv4(yEv`$Vnt{*k|4jDN8$Sz4&@8XGnfJR+=?aoF*&PCs6*R)S76TnmBKx5ji z#tk@!sYx*21$ryzz&2Z!tUyn_up7lL4o((?7jFAv6^qJBE;ek^AIdszTW1t_iBGp4 zmM6=jA*U|QhCY5sb69fZuE>QBl;?NgA~GeuK}@SK3_uCr2q@-Y7!H$9r1((y(w3s>v3`RpxIfKMiZl?B*CH|7y+*}8ao0m1f z1qBxXYkhlKQhb)WI?cLu^@f%>DuR{~`3!EY5*6TmeS$opPg#KpPK`-*QkyJ*^BSnD z2gtlA7wIn`n?N3cwUKl5>+@a>y~TMC^ZJc>uZrH%yvKNP$3$$sPFYRAN#opF)6|+; zf0=7LC18II+;V>fEoDMg%4hnkX$6uY+u`=ZA}$ULDp(t=PwEOT1sj9sCX3*6&gHts z9X$9$W{|Z-$_^%tba8qjZ5*)4Qo1yKo@W|;);#Ojc@=dM&ze^~c3$mU=PevNuddGU zEIAPqn&H+P@Ic5C>3Zp;#U1^TAuinc!xIbq^fwdZStmZX%;scyvNBmsn^@~4oeHhF zk(~>{>Xfj)F1|?VTwX{ku!3bk>J0PqBl~G8`&k9%6wyppAL+NgxB>kmxG!!ZT^aV` zi2H44Fm`8fRrraB=G}S-R|k(4&jO3AJ>7py9$4iWt)jG(-heWgVcVq0Ev^A43(A!~ zaVOG*SB;TX6y6_r+x}y6Vq>RA@N4IW;dTyRB=|A;^?cxUHGum*8@NORt46!*`EX7Utw#^(FuQ6Q6Ft9mn0s0mz7yr8ow5vz^@_X%ybU^dI`R%lY3j^+ zz?XSh#epJ+?^JZvj85Td1#MkRDxk;cK6?WUE9x>|P#AD|7{lu++p)mquOLA+sr46t zudi#r!(6I>uv`7|jDtnmM~yA2HS1Hh?6DW*eAJ{GCG3z(tHG~Av!d3zP~)gcXP(~gwBPyQxWq2*z-&5usq)ZyeX|W zXIh^aA-+yb43!ERh_MvWba4i|P;|2&>G25_ftL{nX`E`rW!0`IAGxwQHze|MkVhi3Yf3-H$%}(7o}|^$)Je62#v2 zlAFHB!(=2%lM%Z4P`d6tB~`0THB0O2rh!+`=gAMWOgU zaDu8bX}^c?ih^oK7KQ@fLZpFD3*r)?%AJuA(1eSTmZ_p%yFgpI(o9gui+x%pKc)e} z>g?YF^ckL*LZF6_u!2IOh9J@~ng|n3sw5FsPEasH(4dhz!jPjL2bE@JHi9bDWR~F| z3|VZvbc_vkD2~vN-t@eSC#ISQK1EC65b*Ixc`y0`)z?#&7g;DP4OLF(Rk#=Lg&lDo z6ERLzqB?gKmY>*rCg0D2{TrT`4od^?PpaTqVlw}pMI;pN6k9_8#1>~ex_A>1iX$+n zBZTQG((uHXQXx@DtOJxfUDJfCM88N(hZi%5lJ*^2Y|Ure2k=6N=$siH#*8Q4uvN;k zGWvpm0@{ZyLVPpmabVIB?f)d%Yw-_)HyFX|{o{vqyFEF%W;p6O#pK zDiJt?uLT>r!^R7MUzMODK2h`OT6YU(4?J*uYF z3`&lvSv80AaW$_F;QX+9NgYxPsBuC)qzZ+=!&!Vp8$K5%1)~@C_9stokQ7Kbq;0c zva<8)n!2DaVpJE@b#+O79=VI^hWa7(1>~MlT(?W^=jHsY`>eXDo>M=9J3oZJF01F! z*Ym*_F#aFL^$Y4nT)!9;n$Mx+N0fQXsJ?uU^srbp<3i8ZQNwQ`n^VF2{Q#K*ovv=z zyhf+NJB>l0QE0#R#w#_8&F~`?m2iN%jPOk)kQl zSKn-P*QK!0?VL^Ptoa&s{8nq+e+6X?f2kFOl}4x5>Z?X)xzg=LjW*}yiBMW<@DgUT zpmR9KuwYJ;qPm;+!?3a3se;KkwAkUp zV@K-R$p5;bXVKgDPMurnwu5tPf$}@ixhM$n(%$u}=kVW;8m;hbZ!pH`?rxbggdh9*J}r2Y&zV#p;wSiq7Lo7m&EJ zBq3HWwmQ8y=WFe6#!fr%J8=$6+707Ez18)jXD+&>ms%|ysD3SQ?OR`b4ShYt9-LaY z-F6F+AFg^0<(4}Aw#SRmEw{QgzvZzrcRECIw=Nf!#4z*bL318|2hE~Y#EPbQc$+`24N-zL6~2ok zGHx4Kb3uu(8%C&URD;F4)IV z7L|Y)&ShvBpqAX=i~#DbAgb&~HI;ry{Dmk=Ap1TNI>aF!MIt0LPjbFvKnOX*Nivct zs#^LaD(Xj&fUXLZEMVs`ALi4{Mukk^G_iEjKCz_yAin(MY_mL{9hd<7T zh~i=DXOPcEF@g;l1t)MsmQ4%91#p940x3c?*`5=bO$aH7CXUcqtXl2~00~5Cx93rr zXte^c9dLX_2fhmBuJ#{r_pPew=4;(fy|G-)%b+x+tUGm0%HGi3sOwGv640LP8@khY zKzH3+*4_1J#Vy>zYLTrihy+Lpyrx@%^iH~*m>OB1qsjV2qP9RZt$S1$(EN*>8CfYa zZ#kwVB0*n5>Am?UFi|pTxT2;;`6m-@L5ws z$d(;zdm7ro44zm(*=$-iwPR%E#m*_zm>XKndDaxo0(F~nK$6jQk)v%MfHq+&=dNW6 z`rx`ax*isWMbQp&i=vi6R^-AJzZd8Oc=MF{p=UuosvgvoHgUlZ6D=U81I+c9O%vWc z#X8e|jVtMeJ--&j`Ot6oT0!hU8|%e+zt@A3Dl#4yA$1W=5YorkG#9jc(WWNky1A>= z0;{Fih8zHe8@$W&gCt%SAXsY!#ikRbBx$b1`h z#6=vHjps?Y)jc5U|0Fey=FeQ(JAKo>Qr>95A7cz*5s3kP{1o*kYDqV11YQ2P#M$)y zJ}!niO)vzLb@H6Vk7y|6r38jD6>mjpmA{*#ay@aUKqtuq?!aj>zJuuoY^&*r3cImg zz>)yK)l9M^J7(S5v83k`7K09-*}Jwp3yTIb;WvgDhg>`QccOBd=R(abW>i7C<8cdz zFzJvL%#l^LFK-ktxBT{!@?ZES8vQvAWAI4WhIFLxTk8bTq*|WTyjMZ3dz{{?b&pt} zzs$S?JFU^83m~3KaX_rP0<@?G)TFpf$X@da+xMNSBdI0^_G zz{FNutOvgEo?B=^GI%JO&CpMrFK$Vm#)rAq(q5A%p4l~f-$1{KUT*>K{`mc@8^BQk&3%Bmp+ug>Q$ln-WFA_tVV7gOIxmBK1V3(+jAIyUI&aKd(xvysNy7 za+12CrtShAy=g&^X}GgARU-WpOumE!YfDl|=|%q}^B$A;m~dtF5)+}P3Bu5vqDjI) zUX7YmXP{8@A>nlkMRCy+!0deJYlLb(>$S)+Pi^=Jk zICmYIU7(jy88Q*xpjKTsa3L=P%cSIaHPA+gM`SWabAta4j>Nr#B|a*DgM-=1oE;5p zm$b=Xh*S<>020$~Lk^}=nuE&BTx0`6Ac+J*%nU+Ok(ojWTLO@zB~@!X6`{gJW!>X@ zyw5~4p_V*m5MAeGCajVfuyn-I{p6|x-%9IOa5q(Y6RY)c=s1Jf@oTdZZIC>dM#P90 zmp!lDRec!DUOer2@Av&ya!2M-uc6O)CY|P505D3Yn)3~+>T^WY#bwEV2^oRAqB&cT z5|T^yBmMIieh$fqqsDeI_(}XK6a18mi*E+uRk$NocdZq9Q@Dw~j1f0c7l3yS^X|bb zN24YJC&3Q~a|dOmq>QpzuFJw37I`B@vNk%?A21;e#f6oIB5n%#>X7BKN~B%gWsSTd zeO^e<7r_g%LPOje&CPDQ{0F#`ibE=)3+CX!=>6^|N-Ca&jcaT~av7RXH{o(5al&c? zcns}jDi=^SKxP1pZClFWPTo~+QR&#uLCR5iGMagqGivDEUmC7Ng|C|E7b@(SWWi~U z4>wG;sJAeT6!F2ZH9yjSnI&UN<-5nTFTv!7^Ss-+p>AbA; zF8bUb@e4wsq)Z}yPUUwYelb4q7L1M%8v@MeLOk)$dDd-wWZaO36qz*I}Su4CQL4HKfvs&o+YbZ))h)eT8T25A$lN8TkX&MSZ z;4P8yF6=G1C~FQ+Mn>$T=L#C^rugrm#}t$f3dIwe2FLH4v-`=Pq@R)hw{bBde=%3E zUjkn{Sb*9;2^qUuw9=m&m(WH~@Zp9pM7giRp9>p&SKaV!DTmh<>Rb+1J=Hui7cJ`| zz4Txri?%eLmD=|#ORvgFjr3}2zc$e{>NvN#3qb7vb~ouwEUBv1zy^Y$e;Nf?G1@`Z znFL=%Z>QI0f~`K}B@LuxoxZ}{$4u@p5gsIE!RI22@>S$xGt{?vmrCe!h;SOF`=CteQVA%Nn!7NqR>vfqbO$nV^2Y{-!BapD(iBW~xpfWyWS{%a%?>o)AU;fvTiinJaNBfDxOmcR(5BUhO(DS65d$E@tJ3HS&wz8VNeG#Obe^V-dl$eXDQwi{3>FhkMYMr) z5MxQ&&yCwR=h34hG9N9miEsed7JvsevHXQZ^}P2Lyifu`5Ktr7X&5jdvGStZ2=W4WCu zlvF*xiz7uRVpGxLR?MvZq>ZzEkm&T;Ph4LCDqFZ-{}uH0Y0-)d@ozYm6s;CCOa33A z)v*T-|NjbAe}WO*4^=b_q4*|7WCBQHL;|EH%fx_$@o6s@yUJ-!PQmTGrZq1e(5uw@%cVfVq9N@(Dyr^z&!*&#~$^nPkMP zNt!9ei5$C(-3OjFkNkX_oBI{)EhtZL1R7VdO0;iuGVl*tey47F%M? zKz2}m6x(Zy`l9(6r{%JIuADmyqi!gfB@g<>oFgnvFKXFmy0M^cv9p&BUw~f&3)2{H!z;$fqM4+CM-s70B=lK!nmegMB>a zBKQb0PYoGlbRV8HshMnyirSti?Ogfl^n`l8pPc+fR7!Z|W#q801~*s234e>tQ}c-{ z8(Xct&P&nFFL0jbQNf_>6KYQa5(bC%%$5*3JUiyiPpOV`y{i)xUeGMrpTZx-iaRJV32}Cg8iKilLNYZ%d>tG}s{w^xJxdGLi zPf$jm=iJd+B}tz!2S@k3c1F(`f3q^*T*#S-I5uySEygBvH}UP*Fm$mavQ2-F13eJN zBxD6b%9OoB`H6ixWz=EpBA5*KpfBbYBnM7+7OCM+VFfxN_GAKYDcK{+3uVUn78L*1 za-47b5h4=w4#we@lfW*o?QdwleQH#?g@NaDW5>0T;gm3My)$t0t?n{61Tx3VSb@9B zd6`BhG%ptb*(b>C3wviLH7JQdpBAIJ#+N53IHy_iXCEy5*318X@X?DIjBw5~xx0u9 zgi^u4OLXPb)l0{Mv)fsqFON%7$AT4JNp$^YVMyma90h`{VP0e z0xKbbnqgeT|nJi3`w|_z}77jU4EX zP7@fs2wPIlaZn>^k=BYL1*3!%DrGr(C$UMM1FYteM~DR6=y0jswQ&vo6oHbk=wZPi zC=zp7Oac`IdC!U(xXiH`&A+(9J@Uw%t+g6G>{_<^frlyBeeG``!oh|M3%e2^umGD& z&!TZc`3@+4#Vv;2dem$6Lw81cLfnva7TZx>x%ouGRf};xSuFi)Z2t(7*i^CA?jw|u zlRjF@oLnJ5^}e~+zk;$`obm+EIptI$r(v_snsQedGxJJt$_Wtyj7QOsTak=Xyo2c+ zT8tY*e3@|vz>s>rUS)nnni^LlXh;A;Qm5|hYL|m|tQlcfYujdPMl3>BPWJk(tXCMk z;kQt?Fj@BwM^)6{j<9bP<+lq`8_!@*L;E;LVgbi(>+4YbalaUqMm27mcdUn{-WJ?9 zAA>+7%Dwn?^J7!480lX8x(!c9`D43oO2m6Hnu0&gWW*d4ompYHxy58Fi^3||tHf+^ zk4g%-fi-w+9dU)r6$BJ51-jC$R{-Yl+{0rVR?bu;a-s6U*5K6MYPA)=gD8q{Tqk1- zt_xx05RUL7lA&?O#JkeG2TloB727+N#;5=yJy+Phq8i!*ibh&_D z^bsVa?M7@i8PTcmA`hV~MwDZ%+e$%}9_`rSe#|OqiUTn~OM_!Hq`gKP@h=lmoUFHN zYdRbFDYT<(!IVRS$bNen=iSi=42bUsjbPV2WYfr%8%5?~wL$w2ngtn! zss8{C_DldnvPKi|$i9ys2jh-J%5)+#1l2x7mvNq&jE`7)%I?qbAwpEI@E*ve(TZ~t z!L1o;k<3g^$V+od+%lS_)DznXpk-(+W+ovzC`2U0WCdA0c?%)Msfhu5e38UZW{l(s z{GEj0KM4iklzGCMb!Kh-9W=j3vdF|T6*G?Y8jf%pT_h3C*clg*fhalbY6JgHD!9Ko zC(}|YgDkzI;Ogoif?mOgj}0tD(}2rzK)@W465D1DR3iu-2qf7}=gv8TLaf6G1VQKr zzM_G8312-FDtt+!4)QA+d_gm>4iEG71;*v6Bg6b{M%5LfqB?rp(!b02++hLfR8)l1 z?ihqvu`@p|RmUO7io>FKf9JOGWyg-WeSndL@F=tNpzK!`w+|tHkTLwus{Kt<7xAoz zF`5!F)C7tHDNZaRdTtF=y@n;OLq}cq4`7aNB3@KgIzfZsvJ!Tk+{Cvax{}nd1RFuE zAIWzbXDhFFDzD$FynDK`+3i-4(t^tAxYCzc#~VK2L9ND;zjh|-0vq2uD{+mYC=7~cIvB1Wg@e)==sYbv2Goz( zgsvV~XEt%*{FuVI9v8L0j)3-{J8;f~&#et|A8c(v9u&{?8XI1{%`o-lrHhwPxA8sm z%%E@vOQC`qJ^yR?ZfZV;%^f>jYa>wY6bP=CJ8kB^|#BK`>I z+YaK{q{M?+h2UIF*^*ydMM!9DuEgegJca#*UO$2Yg;@VWt*u~?1o-qS9S?SiC*z?7 zYi2RUf!FDI2sLc?E)57bmoX?>81vX-#9>)D4x0@8wInUtjNf-m2WJQOsD2%`<`Fr` zZ-2_B{!_H4zL5PW->={ZiBg%hn}?cj<7+YqMwR0}uzWLCh!5S^=piswwl{9oR`6j= zD_E>ihvScD_9xhO9vOu#b_YePre1*G!VHghh%5$>iv}$>;0+J^l}Hg20seOeZ%sU* zGJ3@5>#PcYo0aD)AQ#OjKIp@hrz%fgIC~!Y4Jg&$^y-ND!d86cCo7SBNWdnEHjM$L zneV>12$cU21@Y8GS22PN$6drFEMM}BbYoxBYS90M$vq_QjBFa=n*kZf9xFub@L%Sl ziSgka$CBmE!HJ3Y%UQ zd-*hsHG9(}LrbENjGxj6JO)70$-7W~A~F||5AA+i-fZNw`Ayu>uQJ(VVddnFe~WLI z=wub>Wc$6D)MG48Zr|P0|GK;Hv8wc+apUiex(DT!G)rnYvLBVGD>%Z_NHWTmP|9(H zO&Z6LV`LfV7?EV4vT|I$N&dYRfk!jR%ooY#ypD_18G*Up0o4kvBHjl24^|;@MB>8& zMBZQ<3j65I5_tx47jk%rHL1fcps`4~d<1=FuEadG0^r z0+E~;Y|c>gB2q#TcGa!Hq46A#y+UXqKaQ3YE(D&K6-{w*ZkZb=;gtZ9%clqPVtwP| zBDyH?Y3Sj1zzPf_#>9cD3i=8<(Lc@)jSu_kGyK zxA{K8t$<4UYX|ca?|^8LAj<3_+N{|N5bMaHiCaa!jDRx>niyiDlURAsk3?^9`*N78 z<7>8rDXyTUCaRAIt^XG;Q(qLcA#s^&jvKDLVBObBU+0C>=s*eH&6m>CKj&L;c!t>B zgSm-!;PPgwJ=Y?8g(q=A4-w654jV@BLgG+H;5PSWQJzF#Vdx0)W8;x}UP#n)nRZ!# z4{xRJ{vjX0 zbw$L-Ofg@~X_j&UO#OMu-9+w}C8z&BPxj*C$lm&=yuPe2;FNrsbq2Y(RPT2nXK1Ke zyoM!)?}ucqLSaSy-&vf4Dc;*mjJ5Lk^PR-qb#%!`q$l)%@GL(k#Yf2a0Eh~Y=BfY^ zdW}nHOsVP~9yb;zn!nV*w}YE5{5L&!MqFWyCA5X5uYVg2lC8jixp^kEQ{;;@`HU<1 zVCfReZZLTbNqmTtJ&LkVzb}dlC}f_WhL1v2C8$;ZZM1Q75@4MK?7YRE-etlN<2d&k z@T4Wdkx8uK*Ldq^n0$-LFEF9&Qv7j=XXrb)jOQ}F@2seO`d_iqpD_7T-p_ZJxRE3R zLH`dHb2ax{{J3~IiTQj11dx2U$S`PHYt!=StmLEw$YmAn;(YO7c@{L{6sO7y#atdC n)F^fES18Xf99)=RczEIIg+~_-EzFgR<>~Sf#t@aCnxFkY$nywR literal 0 HcmV?d00001 diff --git a/__pycache__/vec_env.cpython-37.pyc b/__pycache__/vec_env.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..94896e53f4d5a9a37ac05ad89f6cdd6c5ca4ddfe GIT binary patch literal 8453 zcmbVROLH98b?)c90Rj{Rk&-2~DN5ux6zM1rs~mYMcqaEn27vI%HMN}VaqUyhNV79)3=bEVFx$c{I zei_dVv4H0V|7ASCA{ND&$6D)EVcgT2OQW}Iw?Zcf6JK_`w(oQz>3H#e*xr^=7!6}* z$8RT*3`V|i{BS2gYu^u(_-gGB;DWZ^G*a`{?X`5#+l&+GwUfS|Y)7JLrq)I<@KgJi zl-_Dai80p4#=jYltb5vj zX|jS&yx6$<^;CbD>N~1IWnWypwjK5TYdgO1!sJ@w$Jmd1=k_)HV*@TIzdG3eqIO*b z?W7&`2REMMY6h~BCKph&k%}MNZz9E)k@*@@->>Q2{Tj z>DjejG!%oN{jldhkltY6%h4w_2fz1YKX%&FUMGp1*q4FV181Ff1pb4m&gCfC_T_F6 z`%Zt@OM*ehwIJMb5*gqXm^MUbwnS6Cx=m{NW67i#Nnr)^G#8Qz+VpjBUf;uMS+U>i z$TR4kvQIYfm{wdj2!q6RA@~_@G=22a(_6ZFJgzuGuF7LnE3cpe>DGGML?7#iprLCV z>7c<_mppTvV10%YsV#>6L98fh8gdCEyzb@`I#5Rub{5L2lZ+O zJFQv{ICZfEAy`gJANcKe!ky9IDd`3H93#gZ?p%2W@LbJgUY$MYWLd~OH<`)&*uY8Z zW3xC<$iJCZ!eO6K5U0!0CbVzIOM)nL;{kv#Z2-H9r>2z&BIQiJ%FJOlFOrJ)SI{1p zkZF1ef1@+m?w-~WiaygCl9C6Zbi#WXjcQMXiMu{T!SQ-M2RJZ?w~iMINBYT7f?@b4 zsyb1pm^rhd_=-aicaq_t=VOdB)0a&IdKVMl>o@?*fvZ5@31U12SYJd0o!v-2yaHOh zL{*(#bQLo<0bE|#_Io|t4J1#!XV(jo%S|WVj)pzqgi+$CaU4|49EMn9W?78K!a=wb zJ>;vlydccZnLJrnY;Wq>iOe5CYD;qLFUeDifmRze@&iyEh32#K6z-4yub}BZ_HjDr z3Dt=w6DRQ=k|~~|yqLHfB-`q^wt^iWC!@p&9477jt?@W#S%dcho75B#37C(0NIY_S&n$DFR37`4Wc zGzG@mA#nXj*ICn#a(l**uc1#~M0RX9Twucqgs{kxAEP6`glqb!KEsbWR`50{R!y*$ zfJLyXapl}97@fZhrShc$HsN=I5P(IS`Sln?lrcZGAQIs-;Ho6m>`juR@Jb&o6nMq= zt~VR=M-QaJr zQHCAB-ZoP4<0?0i;@imP?0_@E6xL%M<}Yr4W&V~$1!YN8MGa+H)I|ejMJ$L#l-10| zNbC2u`+h&$=jdZ4TDbOa(aV4#0LjAj0;@KD+UAWMLR)4E_k1;TRtBLV&cKtvMZcE` z;d-0o^T(}Mk@y_n1Q3l4xi~i9R+tkjypx?SPd0&lx}^!@bNyc~L&D(8o!1Vui9NQv zCX!`noH4c!?ITedY(eVXC6zDl4U)68aJmZz z#vSeBPq3TPq?|lAE*gUF!E^6bt`vO{) z56_M3nMQS&P_L?bon9q)P|NoE=(pHw9jm`6;D9w7qhSFHup0JpI|1x2B~Co>+d&8B z$VsE>inHrq!tJ;1?f7(E+PIez-|-xn{QdpnWiv<0QM??3WC|*yOKY6Rt5?mVU6jM( z@o;6r(YdTc7yDk~!WWH|x3OL*IQ7j&Ql1|TJF@iS)rp=!xrKgw1PA)hl)u?D=4Mj0 zVf}ArPIOntP@}Kd)iTzr4Ff@G*5#zhw@^R92>A`P{R2|&$NxD?@HD?{iGQ3f0spxK zQxFoGS}5ZC-)_E?p}vWzM!FQYqk%76`174$YbgC0-$4O2!B$~;YWDmvtp#xq!nVYH zpPF0y{j`(`a%$hF?VeV66r00NoF5A1BCjj?iPHw)50L|RfLKeM+Kd;(sX0R6fL@>E zg(m5pq-WfCxXEspHoY&?lJNX~6s9GHDMCseZQQg$`v9s24e|%LjCg0HWoVq+31j&! zzOV3ylP7H9Oy(p zM3g5(#Y2-wxOWac7LayP#WHMxNCp(!d4L&*C3PKgE=C++9x&2gAN>stepm&{0fA?; zi!J+3&ejtKFdz}ZsGgrt-FdUm!xd>%dO`qO^7KPz7UHLiQ zlIzOo^;rffQcuH*W?6m@laz={+7Qazl6O#JAPHi#;0nJD+twxc(*{xjocB3&YKy3Y z<@SDt6!VGRa!no8qCQ%Fik89_BnOC{Ye>og%Kp~2L&<#iOy| zA6eYW#DYPh9U@S5q$lQ~H8%6oLeR?Y@On1?JlT==vGg$no_20T+5bQ~mYn8O`=V^0 zDm}JhZpKX7{qh>J)Kb4$JI+v{48dcuLIY?XPr*q-Xq~Tq(9^*84knOsy^gF3l{ksN_si@lS4_ zFZ@p=b#YRM2zHA(=mGZTG-=+eA`8egvLlwrwh*7)+eox0IyDK~xc!mwnf95XY*dP3 z1wGh4L=kPD>Yo}N6NrcQan@=gx@#uZp-o{_xVZEHHZk>6SPw1PP0Fm_qfKp}r{7 zMJOEj8Ijzd+Kz=3(E1|_p5ylC;q-}ieilv@&yTTG-(=i8$j(8*7}epw`4YYn7SNls z(l-Z&r>VpoJvJ<2%y)bfh0goj@uWw!&a{T2MJB8|>J zlfnF|QG2$#{8hZ}q^GPYOnhq#?5&;mv(bZzvnd;dh>KB8way=0ih%7j5t^$E8ES3T+2K z#|89R=zD@zdt5ov0sZjhCiO{UT$h7v>;(R$npuVS2ybrO=->*-XWN+l3-*f=+cxmd z1Y!Ddc@=B?RDX2gfd+Ug;Z2IR4^f9!Rz|;6TDgX=P%`RGwQklgJGN8dsM8U9Z)1%i zUp4Q9>ZvyPhq*Dx$S$M6>j>K+Y6sUY<_bz(-^|q2$z)eeWGn?XQGXZ)?ab$!!G}E3 zR45S@i07h|=*OtgE=XVl#7kp%+7oT>I|;r7Jb@}5m_WF%}SMusfOK=eOlL(g|HA8MO`-B<&o75C>f@n&dxZwZ@^nJEf$w#HcQ$59ppcz_saU{g?DjI!w|M)v=yo^zU{tX zWUHKgFXX7~-cmQ1(l1U=8+RGe@B7MjYA$5=*ALLsTFhJnEULbSsx8SL-_z$r6Wyta zRZ>fM@IT0u6hSgZRnY{INiEs|8FFMC$)+pNco)O}2PwXYOsg(dD%ib&KY9TxNN05m zIeAjkEn~%yoY8btObutjgMsbgZMG%RTvDggGbg^hW=<5vOnFk0rdn#N8!3a|>ZXTp zs)z)}nJZZC;8S7pS8t`|wyUbNW%B2&f;}Z~V|{#YMQIO@AKTE1)M9*?oh;>FsMT=R z{a9|ZUdjT(nSZDJNy3xbSSBD-bR&<_%Ju9M;tg&tc92yzI$YJz656C^<0w4=Xn|r*+y<^_-H_gfd?!C`DfqN%# zkK;H3$i3c4Ku#vl9|hz-uM9{zdH$Gp*1O+(0M8%yp70*@PNDWb?}Oe$-ovP!@IK_d zKdF7#Tl78v*uCiE2fRno$D>IfCjoiPTLNS$dGmdMoc7KDawdV4J@cxu_~bsM`OVHvRqJ+rFSh;8jX2lxSF5NjX5!phEv!Y6QW?A}&gzCL ziwi|kS~R8pRYQ^FzWdPWwV>^vzTtbdPINl*!^l_F=P#bdzkY!Vm%7_3kG4}d!V(I@ zESOmw@@L7tI>Cb=pW|h{9L^cf#heQ9^vkua z3mxz3Mzi}u=f=K+XY1^VKb{GA4o7$jMFh<40$V+E4;&ELp6OZdm_4KF^Z>19Jezph zx&MkEZK_T==(M)W{taJkm){8Sf;S$E*2)c4Yy0~adc&e++qJEh-&tPF#`fxVJI=i> z=vA>>tu{N&s9IHQ8BbM%m9V<9*=YDG&Q#giq8&S--)ab6Q3XhFs|#XH&7nAgBjhp| zR>>-wI}2m;T^hWm&e!nA<>3;_oOe+1%)aGWeYQ*oJq|vu}wmSN%asBA)MwDB3qug$ut@cb0 z>+_D)&-e1cq}_8G4ltS7dHRaq^=nbNR*#xN=h1Sw*)A*pW=(mImTMia4D^))Ah_JB zg;5ze^3{zRn}$oP7`yzI6?uW2Qrl{VSn`ajbyj^f59k1Sk>3T)cD)+t@G#(UmQ>#H zVyn}QZM-GUgi)=&u~<;|=-V=&xEKXdtyR@SiXAWL_!YNSuVe8VL2OoI8)J&IgZl!_ zqr0k5!Q^@!L)Ry9ghx>rmSgg7)|>~)7fl!Sthr$A%#E)gSf`tEJy(%bn^G*WYwVhP zhO7qGYR~9{UIBBsYJgIzm^;rt*Yazs+$9!eXk|@nf@BinQaK9Bji%CAUP7awU2b+d zKt1?>X)&YGmZ`HJjn$0uLqAgQ!*CWYL7RFI^~p&;0@xCc&_!Wb6Hvc=Cp-cY`PLn_&~AUrBLkus%MRZ#cC@I^BJ zQy;uGCSu5)T(s~k3K1kmWKsa%N|+0x##zs-XDMzxn~VcyaUgK=BA7Fp!uQ|B^wRdFF{%XE_GbrWv0)#B`7{sO8iSVvJ~Iy; zF>z;B?i}JB2>-dBslM$U?x5CyBEU>?kqg0X?V0O^o*lXCQ{MbL7T`ti9td%Wmd~5& zfu7?nct-}KXjsRLj@5Je8JSPUo5Np;xPcnU2!4ut6h=pvo8NrfH1>ZBnBCuHLEdP7 z^WOvZUDk{nma+eB-uMoR7k3W7*4gL;H#<-aVbI#7^j{K@zxwutH~#Cl9{BBN)oCGh8L6yRBVS46gEU+ZCjCs<2)s&TvIKu z%hGGxJ7cN{;Pw`FJIHR=#2|Bi$?N#+r)G8(2+^dFl<@=r3-l|3;QcVQfpF_iE zafIYo3eyu1r*c>e<}$c#)|@j>;@DZ3An=4?7R_t?chw@ES07;U11v-=kU`=i6k*h? zxBX}>@DyJdPbaKi_gk$2yOZaE#i#kO42{y|!#G6FyJjil+LkHYIwozo*yq{oVH87Y zE0XpgD~=?Sk4rCXMXGkK+4e6t+s$ZsOunYFgSvIxgcE5CGma*l$Wkk@7BV6O)ge3~ zbC2h$RNH>H83vwTrP2-KQq^zOx}lFH;q|B#j1S$>d~&Ub(yKTUSy+NJ+&MBnk2_@{ z_hQRZOey4f(#fgCFpVo+I(VKV>S!5 z6s4|+QHpn24zV+Ltvz}kgGI5GjEp!Coeuh@i(=(#|*~i+gD8ETfOG9Ig3f7`f=FOm6jQyb(tG6IPtx z^5B~YM4NP6k8-U6Syw)ZV%Gpb7m6G);kV zFinG^jg7oo5_-;f<~is%@U1mQ&(?BW)H4T9V&)jXjFd|n9~!+=6~B3ez+)>|iA@!o z-d$;Xos%2THZ4`q*0BR8IY!}JI?A<=^KnXM?M1Cywcco6$s=SYQptP+f&1E(mI^8K+;1>Jjmg=Qg_EJWBC3(*VZFZ>va z0Sozw&yBN9-y3I#XD{;=yW)sVPW#)XfA8hNWLdA^P2rpqYn(oRF`j=p=tTa`X1hiX zOI&V6y$Ks2)9iTu*8T@@wNKSYKU)PsJ2tmsb9;;(iEAE7Y#_pF!eqC!XBFAENS;n| z&WL&uk@F!uqE=XtVPGQoQJlNc44V<+D$_}&%6EpN+&0jrq(7}v`S_bS5)Cp7A93g2 zgXVT8&#{wKntTHn2l1Q)fnZUt+q(`N$L!X_5xkJy+@1mV|LCVIIQ(ue+jBkhm~qVL zWqP@D#`QC=qvpZdLkYVRrE|<^*l_KQby|)2Bs5S!UGB-XqrMhRMMbu`Wk%CD7j9uu z-?7jpdyb=XcT0&ITS#i7dMvlBNIZ&OzglC_K(T1W`EC$`)i*nl+5jXjw08ztd~s$d z8zh_*+qITo$*%g*l?yM+3dK1%Y~U^UNLZbAsMgtsUGqHMo8vX>5gaVPqZ#l0e5sBR zC{daZN_+VKL#0{Cvrmyuu|r=qF2MML0~R&uZDG8V97*BzHjeN(3aVAc1m?{JDA!r2 z*8=Q03ZtE42d@2?C>m3?Bm!Vo0t8?T5T`XFCGM_Vzs#{vs2PwA2fkA)*!-k+Rd_@N zEJ(T|&iQM+6vAdPpmebt$tX8*gnY!cEHj%NMW>i?XJnbU^y!ZSJCEZ}qk4-k*Hkn3 z6n_IxCd#V>YH{fW z+%r1wp_Je_`6DV*1U!X{0k!l^#3JHRE@kU<;9hxN3el@TNxF6fewW4f%2A*CVCZ{b4E zYQlzwg(1>wQj$4IsMCc+bG;0_>O8#U0_0$B)#^|6rqG+UVXGb1s+I@OylQ*dzNqt<{<2gtDx{wsY@sNg)u@M5N#5N?Pi|{joPLPnJ z(y*{xL1-Q{bU8>WFE!)*797W*)f%Z@4)zfl9Fk<5)l>!s{42mqJgpGG3sQNRACZ@` zW?Y}iHKRgQWc0=95$8cx?-Xy{&w|cF#g&&KI916@jS(u<=k32Nw-avIit`=+rsfHz zJz7z1k`5>!{|*jec?d}Blmtv^pJgE+=OwC$x`b3q5S=@R#^{40@@|%l1P}Rwk(UY? zigen-jj!;{i6xVBT}9ZKL?TMgvk^V|fhEIW-kPPI0h}G*_MMB2Dbgu0VZTV04E9SN zqPY++h-2$}&o}A*NB#vd48z#G5Stfc^KxunsgqUtW7>q{;d{eBM4{!@ZCWOhx3~?Z ze%q?TUV6)V%WjBG^`;wTBAa&@-M?k_9j^pt29Y63kEp)q>{(y5-!{`{pycm(CgaWE znT$Lm>YdN@8Q_9dMvH7U)6cCMFdJOS2jG0$goTD+)iDGH34hB34w(Hz3*`Ld4#%VK zV$=o9W@@*Hv05||_Ba!?1D0l&#{%KxaR&3l2T-hm%Rha*Z{CKPeayIR-8R>;CL39W zXw?_2>&{iZzAyl%Kfe6R>&w-PuU)(L%JNm15Z2O}*lfZMoT?$dv9cLO$S(+O$N8IU zunSsEM0rx0_-_6jEV_Eo?mk1|B8oC=?3_J#0aKy7bgmWDYpw8^rSx&#C&N|$Kjt-~ z=XLd^S6;jP{0mnwGxb-|wfaF6aFhNT>*7wo!&*8W^*LPr7LH^hkPQ-qH}{F(MRT8& zhEonFi6x|CCHbXm@Gve`xdtb>28ctNTyofvMI?m~u{>h@uzh`=>ws}FUbiemz`!`j z*sR6o3Kl0fKz};E?-tGx|AOz|LDK=gpZMPKoj$++f8KrfT}+Uj4^AZ=4WHHwH*glj zehE3UVij&#N)<;5vUi%@I1j>5Q4qAkxQM~6G&?#m6&GRZwKo|=0~sNcpsA4lC2)TCmyjkVuuw{NOUIWD#!+ZiW>HU25YJ z;T~DcZwcNyqoTKLNq1zNJ{=@f3yj4h9`-bWELXc?pbOc3F3Krk$@p>oWv<#)M;EtaiU5_E90hWM!Z@k zBI`i|qmo5pmR=V{zY^mkV(lRo6Kll|{uoEdK$igu*Ev_@TMbZoX*gkRC1fUPLMY*` z2Exyv&);Y7Tqw|iqX@b7V!DXL8t)0#Xiy;+V{W)%F)}{Ge4o)W`v`U5jMy#1;MjD7 z7%x({_HITzVrMtY%?-$;Albg2>mxhU%R+%?qkJ#xWp4o<(LTk-8)Rp)$mVo_+rgbF zTCQx<%Ysnc#)GhGDxJ+|zJJMLv#n2$g9c%lz3P(Oz!;zOH=S5{lWDpK=RDA3I=*?9xG zl|&PZT_xDlnaVfu6c%2xBvTD0R_RpNHz00XO@zM0mkTx3|2)UU*Jum&i>!^4L0sa? z;OfI|;FgyEg(fnbvRFNa4#EFRIO&2q7ee9;0gtmd%i;+_i-KhJAp-s`3nBTB4DO3ymfY2F?M0g@ zrh3PO3>OQE^=UpT_)j$s2znr#nH7d_dU%MGJ7oCq{LLrh_E{R#v)UEdtuhp->sV65k8tpf&oA0sa;@59=9SH!xA5cPQsPyH~9FQ7>E z3Wv`_Z22pwU{Kc9OFdc)=lk~7)3;eW+jjtGE*X+h#}pw)ZFLS16ZtOW#szBO3LB10 z(J|KxLo4I&qi0zAS$`|S&L6#RC0m&aTHe4!i?czaA<{F#Y`OsRh88jN=axWlo2Q2;}iptC&EH#+*5AqhN8w2AH?<@6Q+GB71>)z@z1tMi#U|bIcn(K+7RA6=q?tI6B}`04Ea)93 z3}t|zG~UC%Nuu#R^otYPw`DQRnY7$QyL(PHC`r0Pa2y8F@PUCp{D26$!Ad}lfx z<@aUD{bRi8^q@mna<(EQ^>#VYR3qKM1w^UQ!gf13(-MYgZCP!*Z*$unGb%V+>&TK& z+`eNu5VhDHgYZeuq0;uU@7TC@8BW2O;S}0w8Q5}?+=4BKfU{Dtl9M|jd(`eU7M|_S?yx1PZ7nGRR-NA zF}}!nso7AzDHyZVJfo@2LtlN2Fjtw z7@$=zAryh-0GhGQE`f4SZ(=n8L4pb~Cx|l0&J;n|Jrt}2m?m6tnG-KLqxeYT)MK-c z*3pAqzVk10+5?yWTJTzk?TgyPs<3xQx8IFq%Yjwpuu6+qQLVIg+sBvRBrwsZZW@SxLj+xh;qX}oT9PB=zX*eIxm3Gs)G zty3Pvn`!(7Y!3V+-;-?>+td9LMI+3&jQRr3xeXKNsofIlE^PqZEqX{OAAzao6&B#w za&zMmd^G~0IO8FWjKJ=%c+=RjSnALArh79DZWv{XWY(Mc3ihXe$@-j`w){>s)0=@q zJiB3ghu8z&Gq+)36Jx2DV;RljKDR6$hDe^lnCI~gfYq=T5SPXr4)tdERf#<5p#9Um z5+Fw)MbIy#$5AbkcMi2%kKr4QP86Q*Ze#mg=QPa4+1IeWdi#aWjiw4ZZQKN`@X_sd zA|rPG{H1m`P*E8hbg>BpTRt|~p1ZCG4=j~0t#$&1?L7_b1&PX=VY9QUn=K_0YyUT} zlp_0gGL%rK_vxlEJ>Fol{_mHf>>ZNT?#;^U5$=4$hoVbwMNH&%bq;g<%!2N6{3 zY{VIVt9$AB*hSKjdNdqrYqL_?3?b>aqQv{w1`*!kHI-kF0<|4G4dq|oIdwUxc|Ja( z8P1~oP*}d%#0L_5N%`e&GBWm+JEaD)2ifW=c9t9p}WrmL{Ac|?+D!j|uMD@SB$u3#g^bKF4S zt7J_ovse6jpuCItZsjT}ekc)%#Y1o&IyZno^)HET!TE2pMy-h4TCfT{#ChVU9{7z0 zw{)@}dCe#ASdESU4U2!v;@`306lBaJ@r!>&*nxUfPqkg!@k6Yf7|;g9KL4y_!$1l3 ztXXtsVWOXaW<8E;ZVWA%vkn6s2y6TY^gm{SQp@reE}=+0(gXLv55BLF@CHLl`FQ3k zHjHj;Yu)!k=SCuNzV&Px1fzF4IKnb6BE&gHche-XOnQ%#8#N^?5#t%{`|WC$!OO z>Jij1f_n%UUqIQ>_!SOAkj+8dGdYOyJT`}$n%2+Yf~GY;P}*b7-Z0croE?1dA!tJ# zn-eCo+@o1z1c`I9bxgg1&f*+s3H#34ida*_DcoUNwJ0O4N*br9`cR z9SpUMeAs~$CBq*`%cf*K6(4%Fx#hQ(gJ$R_f?@wv^mH&$YgcxHsJSl4eK3Fr)rW`` zb{rVhtPp!nc68c1Uilh!Zid08s{7^kCO*KyZXs_I7qxPepZsa5Su4wqbZqLZMeh&G zOot)Ew9=BjpKm}cEsaF4bW-V!rQtqL?H9N@#`m8w3t=WrvmfCl#~M^*29caie+ zJFC&!7=!&DS{=+_zrfX?QDN9*FoZjf-+{g%lZju6wp^kd4q>vFP(hdvA6WD)Mg`!< zGK~Ndm+fXe^Fs$`k1)icV2#kuhouj+Li=Nr1Q8Av5Q`oI$C!-C;xu8;q5%D!z$wl_ z(KXwfZS2>VEw%FfnAcJNg$+4jM$ccZkuqtqHO#e<<#l;o-49)_>gIoTmR@Mew@Fr-$f1SOywlU zuG=6p7^(@cW`tMq!87oUZR&IkfsYU2r2ab#K}E_yLz?*>x#pV#+)yonXHFhCk(6)t z>Gq}ahvdLRXyXr1A#KR6ChIseZM-6D@XC>|a>_3Fh<;cezf-Wh3^u1A4zlY4j-8`V!b(7HxPi|{&D{co0i1(9JJE1c z!q~3jq*EBl+QPc|}Y>p(kd=^SX#kp_|-`ci(&EMDr+VNl^ z##xxzZFuOH&_)s-_<#<%Ce38Yw$BIoym=E}jy{g#Z0t6@ttxjIR0`ZE5HzYQ!4^L2 z@wW6nRSs{09Tj=!UvTMI8GuNU&Py9S>@K48*iZbGS-i>OXIT6q3yEz=P)*`#jBiPtMtpZM#>IFM z$*9E&*?NL#V)vZ>5S9+R5k7^2(KDtkC5mRdQ*-6H6LWKOhvy!edt~muxtYS;9AWvw RY+k@3BX8c==knh&H+FW#AU{{Y2 zIN>xUht@hN&74DrlE4kTpcVM|*l|0cyvsX0_&^U`PT!C?yueJdA5+r-F_Eega6vJ0 zuy=8Q=Hdq~F^=qL{r^5ayQ9&q}$NfKJ;Iqi1+jz)oVK3BHX<} zba<1T_jOc}V|wfyy9)|WDo;n;tMeW0YLB;cOH$p_zHaM|4s=(C-2c?mect})QBsl5 zEggNa!aMtCeV&ckJ=MJT7Z1)JkCQwvs^MIT;X7l1^R^iqACjCGS==)rnndO-PcmVGDp9j$ z7=cj59L9nj$y6xQmDz+f5wlhmRg$tJz#L`5I%$TQCdzDxd?BI8Gm%$p6&w7Q&Kl_^ zv}#$S*7#E)C)3KbmXcRf6O54+5+=Fn)+;bIlV$7~2O}ZU!YL~YnODYN@vmpe2@|DM z1s4ntyi~DoT>N$8p&E5r2ykiw^qDHfSb>9kWV|eyiFg2Qbf9A)uwZKxLImR}AvoA_ zfxZP>EMThzHY{92Fwy1GsmLeQ6nZx3+m$4TEp)yT7^3-ecQh?BF^^Nb_XUzl7J0otHC3(s3f!?too89W=c#yZNtNa4 OkTvQAR|VN|^!p#B0R|5M literal 0 HcmV?d00001 diff --git a/dmc2gym/__pycache__/__init__.cpython-37.pyc b/dmc2gym/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..71f9894fc4a67a074d69395582f5cab5f073ab06 GIT binary patch literal 1001 zcmY*Y&2AGh5Vkj)O*Wfs(h5ois0v6Na%duuxFCd}Exl0=R6^>-TE%YSX8$yHlD2X$ zz?C=P07o8xm$A5Up|8M+@uua+UVrl=&-mN%c&F285gf0-Y5u)V$PZ^+j}VpTxYT_F zPB=};k&|9ZGw;ZwgtvH;hac!sjng+|5MAOvaxkE_31T9Z7H~4|qz zJMm`?NOj%B+^^z2Q!_rVo4TZ?V*=AKO%s}yiFoj-W4gTY(WgYy&w+`*IMU67^DfWE z?15T++lzqXL*td8|p;OR%#4F6>}I1b}Un& zY)fVnw({7vE_9NzW55`76jebpHcLBqx` zloFt2k-r2ez}B!7)<<^gx+&nyhG;WYim?I@Yi9i{nTbIU8fZY%fonZ*oe2?w^_37D zI?i_e16>EMgTS>AtRvX?no272iJn5+IRkf+==JgaGcrpoLw@v@E-{Sukfr08i4!KL2NxilIkmsZ2# z(r(yqX;PQQeN7sE>7LPWP&cK8y2bS}>b5MQUh>QMUP0ZFWz@@DUqHPg7f@f|dKLAm zTtt15>x-x_$y2DG;`)*lx3t>w$B3gx@aI1a+jcvx}qyr%|o)bib?+iylY?$$n+`0mErzS{|Q{m^X7kO+ z22w;8nuv+!!2}RhA2+x&$#zY3qAPigT3C`Rkl=z^bvvu>R?Uq^BnqaBAT-`~o4rvw ze@8X&s>sUyS`I_LcX> z^^5qI{nq8qe*a?Fj7O^3^MZaS8jMuSUmNZxj^_no5PRNJv?gAtH9mht>1(rYPu4|O z+tHy50eO{y0c*M0{}gG zuX^6yQL~qQDS4h8v^-Ba7*~;%3Nw?g6iu&asY^%^H94j!oD#RSP|Y9ew2t&?(Yo+< zzuEC`v3AGbX7i;LchGh-{ksA6iQIVLZUMeh`jXz81kOy;c-8FlVtnp# zJYEzh8SJS^FwU#H0!kRX&JQ{||v*R01u!IZ(Em_vczcp+yn?i!6v z`@s=TSy4+1DSv3zb)@J^$WBIsw1EmsmarfvIlb8o<)B}L1q+W6sHDWOKp>Di8~_1e zIM5Hl1{no>HMiQQH9gF!Xi!D^tL2kxxq9gAH&9a3J?H&>RcuVHcu48~$ z>k0rEu%_+tz`$Azz}H5WrcwUU3Il6x4o8t+O z6#5xp{2iK%ks!vOJ33JT$4AJGam>7SBACx%^sBSw5SSfHQQISMrcJ+2ornf=IF~q9 zqnIs?)l81noMfRs2%^mUef&@S;t_hwaZDnmffG@g2a5@pRZV zkf0fgo-T@6z>iR0K(EhAhlGinG^{?k*ha8*)D9Q+(Gchs_AR>+7jzAF?xQeSgqtJ3NC>aOdgVVUB^$1*6!uR z4*JS(%4GTNcQ@a>_J;S?=Cu!QZCrcj&7_*OY}|b3<|Z7DPoAh|reTr@601!Il|;t? zkD2WyOwvP&Xy0n$S+ORT^%Y@jD`F{k0Y$^*XnDlnkoDMdjX|Aud<6Y=)+DykT+9oXxPb8@1B0fgR~LjrM84g7(=C9OMu?-=UevgF2$JBSrrSuYE6&{vbUE{2%k$E=hR;uchR}G&E-=*7BF3oPUS4en?6^nY9WkJIY#R z{FV}9|2`l`Kzk^n^Mrof(xC2%SU(`vFb{>$_Wl-IU3i49iGW5(8<=Vd8pz@JD53|J-xoEm#KXuf!H z_M3YGmujVK-V0MRUOG61kjP{{t4fRl*?3guHDh9e)qZ`OH;>eI9QBvf+l4ETWEp+M z#OPopyw9;_lQx=>j_^91rKjC}^)@sTwpsFY8y+JybhbX1o3tjy6#PE5Gkwuz;=%*{ zq4?R^Y3ZTZt4>IZ+K%>6SMkJ-Pm@mXiL*GG?4XzWMN#V$Tr|F}O)021i+#cJ#8Ni; z|M8P`zxVT^9$~j1O`S{%3nTpFJ<^g5c`j#Kb&l7nta7`USXZ_1r6>2rh}>^dIyfUs zgrEt|a99gwy!%(u?A`8djl9lxwN35&srOk-nRa~sok0^Pj*iP4D&mXA>_&*L^rRoP z5@$Gwf;bq2W9Q{zu;;b=abi;FP!o;HU=$BWu?$*q>bqt~9j=Y5**@j40s~qcW+=K! zU@!Mn8ZGcik3+~(v^{FKdp`TT#EkFCK5yrw6z%X>@KA$>wM$+ubrSPa5?!{auTVz( zhfrk@ws3KU<@o9Xwb-MfM5R_vLzYY2mWDiyGJFdHAjCL|ZUq{Pg@DKl8a4$(UXYl> zt)^;}_(xt)AZzAnBAK5eMK_RX9NE~S3YTeXXHi}fuDBp7!h!pIPQN5f{H=&(v7%Mc zcSShdYeiHw9racHCG?%0v5Fk?KXS0@3BHc}>u+ggATX?KcEl`7^{9WPe&mms_K1rC zQ>F-%ItNuL6<`xH2_QI50{}W4+(#MrfUh{WI*$MbeAu1_~_g`|w zuVAJr&6d037vYMjwRS48!cl*?pW>2$*r;TvT?+>43L5yFq`rZ-#2##Qal6mAyNd4b s_}YdKyL@6{_s#e{*Sr#2R{a}$r+0^k;B18d0IY-qP_M(_5(X*;HM+pcN7wvW~s zl!ie&n9`Kn6*E?C*U-9yxnaFs$C$_c2UdHYmw14-@s`EQyz;=}6~X!o`%Zh2+aFl1 z>LcuJwQTKnQYo~rMo~wo)fUs98i|fVXCaBQO&JZsc-T|vrtF9gqe=Lc@pyP{<59l_ z;EdZ3tUcDVdX{bN*?Z3RmE5{x{p#60H)pxK&+gfK-kzV^V=MP2j9IyZo=?4x&wgZO ztZ(Nwcewk|-Yc0mXZjZ8rM{B~eP_c~gUtN{%e=mym+}BqCGI_Ra2kL0kTnu6vGxJ6 z_7{NM>YL>;eP!CGrB=*RZ0u!h>;qq#unF5^S&&=%EGzdb8@9ZMEvk5GIRkV!duT7= zaJgNza=QyL9#YSN`ZLhfk0|#XtU8}NXNZFawKRi+v6gM*%(I=5Xe~$|OlmtB=~8!) zM%gPbPdhJN*Y!7)5;BX^S#yLi|tHCoh-yIAvT!98ma+cu@$z+ z>TH=+02^%ga!7_2W@)&VZeP7INITI$y?*t?d`Tt;V|2kQjM50J6=S`xV zI#DKIuR_v>l6+4(H0In&w3!|4$GvrP z`~tv%EnS=Cw+4(_0-x^l)CX0KWF4`56O?6)v1K!1uWevX@4skYzkFK zCWw`SSR<*u$QXwwqb)IzE1;DZX@aYrbWOD@0O2SWhyj{yX}3Fwvd0BLSco|U@za>B zOo(GH4t)uayb4?g&*3jX?(D%&;z!dfx2HFS0`4F?K5C%*<8O) zq5!_upU)co1@1qj6&CYAeqlxt%Xt~QReBD_Pff>{lBMZtrzh5Uc>)1p))LJC)tRbA zkap{z9iN*Z7|pDGK1XKRD1Xl6pJ?5#y;lc@g5KBJh4D&WpU_$tlh1&yZrGq(1pd+? zek*%w{8ZNLe`owO*(O5geD+K}zt28r>IS4#8$U~UBP*99JYt1=*@gb)yq?eVdhYRg zbH zzyv6Y`Gwv654}eYfc84+05RT6(pZVzXWk=UlS%WFy(UkEY9?vc97b7Zy%{At&2Bt^ ztNQUU>IwBB@_4z2;7`sjbS+CWr0uT6QIz(!VlL9%^T^_1k;a>si`BU3#);rpqQei5$?J&9U>-divf~GxGoSAGwp}>W1g++O4z~bG74j^tHoBK+K5MZIob$G zYBSD|q>$IRc0^J1UMvi zsGw*syl?F5HfiYtQx8E3YX?GYS64}I(*d_PNY}KDbnA{H5|`K>VfH5YLy8fJ_F;|C z=E0(GlOBt?2dH0=+NR5u?{0U*h|VQ%66+2)ZZ$|@gRXoD82K`x>ryDgSXh`xL`$3J=nEl(~kI2OHO41I3??hsxX$beM*{a6%JE^4B=u@zK!9QE!$|cOYVR2P}MM; z(uQFUDrI(IyUWKoI3zh_f~$Sdi!(xjLKbz%v)_a^4B-O;1P+z#Ezl_smnKjKo~}0CZxPYqjYq6 zhrpWz$d7e(koIB(st^t#e-2FBIG_n-h`fm6YLqHOjZQk+F-3*0&bV>QD^yj^;-P?- zijgE^qhd~NXnU=9Qhfnwk7C2`K&GguwTy+4#Z_QY_e}Y=gx+zRn$9{@ClzsrHgyj# zD9p^wg6<8Z%s!Gfpm&z;2v*-W@4k6Q5oa(M%bg1-0I4uoB7*?^8S`$|!Yzcx3LB*| zSDE_W#6L16`4a*rkC=?|Q+hS!ty@UNFT!ZMVqz*6gDh%SOvp6-ld_o)Q9EzpiZP9- zf5v*sR6%gS#`LeocF)h6`td1;g)UW4^cZue1c%enZ3o`QxC8x z_kjV|)GDXq~fH$@RSP9AcQC`*QU?>*Ljru=^MlCtRxp zI$X&)#XQ7VROU|Cg^9YwEvj8Vu3{bVSaonEMzS?h(gAEgLz$?GE2hHH3PDm5k*fsW zC9naYaoI|GLf)mmOM!}Fqp2F*0UTS7i1I!_D<~X86505|3H|u;8#2JT6c;YO;-pizEQM zaZI|z(S#Dk34k(QO^iN~#ZNIn*>7q&hMC5U<(Q>(S@zaKb4l$`-lQM3^cX9m{6Y6# ze7aepTUgGfHv)S^pLHltIiu&$qb%o+zK@=REC=iA)Au5b;-2A{uSL-I@3GqM9AQu(pG*D4jtKat>$EMOgrA7m+)7CVtRPwJ59zrv%a zP_fvO(HGS|y9>v}L}?X{OT4d6ul@5H6&KYkVTfWh#AcfV>es^XV_Z~<5!t~E*(G31 zj;vM(_=<#&HrbBMXke27eUB`hpUg^rN`RP~VVLYg()SQ~4d6vQiV{%}98`5hTe}@N zK_e*FT>1;VU?K3#jMga>uRB9&Y6r49`91;qTGw7S-+)3sq@iKG%qB<@5x-|VZWh!@b=O_vZ=Ps5Y&YyH79o#4c_&TLA T^LYm~r}?W9MAaEA2f_aVqt9F4 literal 0 HcmV?d00001 diff --git a/dmc2gym/natural_imgsource.py b/dmc2gym/natural_imgsource.py new file mode 100644 index 0000000..42ef62f --- /dev/null +++ b/dmc2gym/natural_imgsource.py @@ -0,0 +1,183 @@ + +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import numpy as np +import cv2 +import skvideo.io +import random +import tqdm + +class BackgroundMatting(object): + """ + Produce a mask by masking the given color. This is a simple strategy + but effective for many games. + """ + def __init__(self, color): + """ + Args: + color: a (r, g, b) tuple or single value for grayscale + """ + self._color = color + + def get_mask(self, img): + return img == self._color + + +class ImageSource(object): + """ + Source of natural images to be added to a simulated environment. + """ + def get_image(self): + """ + Returns: + an RGB image of [h, w, 3] with a fixed shape. + """ + pass + + def reset(self): + """ Called when an episode ends. """ + pass + + +class FixedColorSource(ImageSource): + def __init__(self, shape, color): + """ + Args: + shape: [h, w] + color: a 3-tuple + """ + self.arr = np.zeros((shape[0], shape[1], 3)) + self.arr[:, :] = color + + def get_image(self): + return self.arr + + +class RandomColorSource(ImageSource): + def __init__(self, shape): + """ + Args: + shape: [h, w] + """ + self.shape = shape + self.arr = None + self.reset() + + def reset(self): + self._color = np.random.randint(0, 256, size=(3,)) + self.arr = np.zeros((self.shape[0], self.shape[1], 3)) + self.arr[:, :] = self._color + + def get_image(self): + return self.arr + + +class NoiseSource(ImageSource): + def __init__(self, shape, strength=255): + """ + Args: + shape: [h, w] + strength (int): the strength of noise, in range [0, 255] + """ + self.shape = shape + self.strength = strength + + def get_image(self): + return np.random.randn(self.shape[0], self.shape[1], 3) * self.strength + + +class RandomImageSource(ImageSource): + def __init__(self, shape, filelist, total_frames=None, grayscale=False): + """ + Args: + shape: [h, w] + filelist: a list of image files + """ + self.grayscale = grayscale + self.total_frames = total_frames + self.shape = shape + self.filelist = filelist + self.build_arr() + self.current_idx = 0 + self.reset() + + def build_arr(self): + self.total_frames = self.total_frames if self.total_frames else len(self.filelist) + self.arr = np.zeros((self.total_frames, self.shape[0], self.shape[1]) + ((3,) if not self.grayscale else (1,))) + for i in range(self.total_frames): + # if i % len(self.filelist) == 0: random.shuffle(self.filelist) + fname = self.filelist[i % len(self.filelist)] + if self.grayscale: im = cv2.imread(fname, cv2.IMREAD_GRAYSCALE)[..., None] + else: im = cv2.imread(fname, cv2.IMREAD_COLOR) + self.arr[i] = cv2.resize(im, (self.shape[1], self.shape[0])) ## THIS IS NOT A BUG! cv2 uses (width, height) + + def reset(self): + self._loc = np.random.randint(0, self.total_frames) + + def get_image(self): + return self.arr[self._loc] + + +class RandomVideoSource(ImageSource): + def __init__(self, shape, filelist, total_frames=None, grayscale=False): + """ + Args: + shape: [h, w] + filelist: a list of video files + """ + self.grayscale = grayscale + self.total_frames = total_frames + self.shape = shape + self.filelist = filelist + self.build_arr() + self.current_idx = 0 + self.reset() + + def build_arr(self): + if not self.total_frames: + self.total_frames = 0 + self.arr = None + random.shuffle(self.filelist) + for fname in tqdm.tqdm(self.filelist, desc="Loading videos for natural", position=0): + if self.grayscale: frames = skvideo.io.vread(fname, outputdict={"-pix_fmt": "gray"}) + else: frames = skvideo.io.vread(fname) + local_arr = np.zeros((frames.shape[0], self.shape[0], self.shape[1]) + ((3,) if not self.grayscale else (1,))) + for i in tqdm.tqdm(range(frames.shape[0]), desc="video frames", position=1): + local_arr[i] = cv2.resize(frames[i], (self.shape[1], self.shape[0])) ## THIS IS NOT A BUG! cv2 uses (width, height) + if self.arr is None: + self.arr = local_arr + else: + self.arr = np.concatenate([self.arr, local_arr], 0) + self.total_frames += local_arr.shape[0] + else: + self.arr = np.zeros((self.total_frames, self.shape[0], self.shape[1]) + ((3,) if not self.grayscale else (1,))) + total_frame_i = 0 + file_i = 0 + with tqdm.tqdm(total=self.total_frames, desc="Loading videos for natural") as pbar: + while total_frame_i < self.total_frames: + if file_i % len(self.filelist) == 0: random.shuffle(self.filelist) + file_i += 1 + fname = self.filelist[file_i % len(self.filelist)] + if self.grayscale: frames = skvideo.io.vread(fname, outputdict={"-pix_fmt": "gray"}) + else: frames = skvideo.io.vread(fname) + for frame_i in range(frames.shape[0]): + if total_frame_i >= self.total_frames: break + if self.grayscale: + self.arr[total_frame_i] = cv2.resize(frames[frame_i], (self.shape[1], self.shape[0]))[..., None] ## THIS IS NOT A BUG! cv2 uses (width, height) + else: + self.arr[total_frame_i] = cv2.resize(frames[frame_i], (self.shape[1], self.shape[0])) + pbar.update(1) + total_frame_i += 1 + + + def reset(self): + self._loc = np.random.randint(0, self.total_frames) + + def get_image(self): + img = self.arr[self._loc % self.total_frames] + self._loc += 1 + return img diff --git a/dmc2gym/wrappers.py b/dmc2gym/wrappers.py new file mode 100644 index 0000000..077f2eb --- /dev/null +++ b/dmc2gym/wrappers.py @@ -0,0 +1,198 @@ +from gym import core, spaces +import glob +import os +import local_dm_control_suite as suite +from dm_env import specs +import numpy as np +import skimage.io + +from dmc2gym import natural_imgsource + + +def _spec_to_box(spec): + def extract_min_max(s): + assert s.dtype == np.float64 or s.dtype == np.float32 + dim = np.int(np.prod(s.shape)) + if type(s) == specs.Array: + bound = np.inf * np.ones(dim, dtype=np.float32) + return -bound, bound + elif type(s) == specs.BoundedArray: + zeros = np.zeros(dim, dtype=np.float32) + return s.minimum + zeros, s.maximum + zeros + + mins, maxs = [], [] + for s in spec: + mn, mx = extract_min_max(s) + mins.append(mn) + maxs.append(mx) + low = np.concatenate(mins, axis=0) + high = np.concatenate(maxs, axis=0) + assert low.shape == high.shape + return spaces.Box(low, high, dtype=np.float32) + + +def _flatten_obs(obs): + obs_pieces = [] + for v in obs.values(): + flat = np.array([v]) if np.isscalar(v) else v.ravel() + obs_pieces.append(flat) + return np.concatenate(obs_pieces, axis=0) + + +class DMCWrapper(core.Env): + def __init__( + self, + domain_name, + task_name, + resource_files, + img_source, + total_frames, + task_kwargs=None, + visualize_reward={}, + from_pixels=False, + height=84, + width=84, + camera_id=0, + frame_skip=1, + environment_kwargs=None + ): + assert 'random' in task_kwargs, 'please specify a seed, for deterministic behaviour' + self._from_pixels = from_pixels + self._height = height + self._width = width + self._camera_id = camera_id + self._frame_skip = frame_skip + self._img_source = img_source + + # create task + self._env = suite.load( + domain_name=domain_name, + task_name=task_name, + task_kwargs=task_kwargs, + visualize_reward=visualize_reward, + environment_kwargs=environment_kwargs + ) + + # true and normalized action spaces + self._true_action_space = _spec_to_box([self._env.action_spec()]) + self._norm_action_space = spaces.Box( + low=-1.0, + high=1.0, + shape=self._true_action_space.shape, + dtype=np.float32 + ) + + # create observation space + if from_pixels: + self._observation_space = spaces.Box( + low=0, high=255, shape=[3, height, width], dtype=np.uint8 + ) + else: + self._observation_space = _spec_to_box( + self._env.observation_spec().values() + ) + + self._internal_state_space = spaces.Box( + low=-np.inf, + high=np.inf, + shape=self._env.physics.get_state().shape, + dtype=np.float32 + ) + + # background + if img_source is not None: + shape2d = (height, width) + if img_source == "color": + self._bg_source = natural_imgsource.RandomColorSource(shape2d) + elif img_source == "noise": + self._bg_source = natural_imgsource.NoiseSource(shape2d) + else: + files = glob.glob(os.path.expanduser(resource_files)) + assert len(files), "Pattern {} does not match any files".format( + resource_files + ) + if img_source == "images": + self._bg_source = natural_imgsource.RandomImageSource(shape2d, files, grayscale=True, total_frames=total_frames) + elif img_source == "video": + self._bg_source = natural_imgsource.RandomVideoSource(shape2d, files, grayscale=True, total_frames=total_frames) + else: + raise Exception("img_source %s not defined." % img_source) + + # set seed + self.seed(seed=task_kwargs.get('random', 1)) + + def __getattr__(self, name): + return getattr(self._env, name) + + def _get_obs(self, time_step): + if self._from_pixels: + obs = self.render( + height=self._height, + width=self._width, + camera_id=self._camera_id + ) + if self._img_source is not None: + mask = np.logical_and((obs[:, :, 2] > obs[:, :, 1]), (obs[:, :, 2] > obs[:, :, 0])) # hardcoded for dmc + bg = self._bg_source.get_image() + obs[mask] = bg[mask] + obs = obs.transpose(2, 0, 1).copy() + else: + obs = _flatten_obs(time_step.observation) + return obs + + def _convert_action(self, action): + action = action.astype(np.float64) + true_delta = self._true_action_space.high - self._true_action_space.low + norm_delta = self._norm_action_space.high - self._norm_action_space.low + action = (action - self._norm_action_space.low) / norm_delta + action = action * true_delta + self._true_action_space.low + action = action.astype(np.float32) + return action + + @property + def observation_space(self): + return self._observation_space + + @property + def internal_state_space(self): + return self._internal_state_space + + @property + def action_space(self): + return self._norm_action_space + + def seed(self, seed): + self._true_action_space.seed(seed) + self._norm_action_space.seed(seed) + self._observation_space.seed(seed) + + def step(self, action): + assert self._norm_action_space.contains(action) + action = self._convert_action(action) + assert self._true_action_space.contains(action) + reward = 0 + extra = {'internal_state': self._env.physics.get_state().copy()} + + for _ in range(self._frame_skip): + time_step = self._env.step(action) + reward += time_step.reward or 0 + done = time_step.last() + if done: + break + obs = self._get_obs(time_step) + extra['discount'] = time_step.discount + return obs, reward, done, extra + + def reset(self): + time_step = self._env.reset() + obs = self._get_obs(time_step) + return obs + + def render(self, mode='rgb_array', height=None, width=None, camera_id=0): + assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode + height = height or self._height + width = width or self._width + camera_id = camera_id or self._camera_id + return self._env.physics.render( + height=height, width=width, camera_id=camera_id + ) diff --git a/environment.yml b/environment.yml index 04b1c73..a0dc7c4 100644 --- a/environment.yml +++ b/environment.yml @@ -1,4 +1,4 @@ -name: tf1 +name: dbai channels: - conda-forge - defaults @@ -260,4 +260,9 @@ dependencies: - nvidia-cuda-runtime-cu11==11.7.99 - nvidia-cudnn-cu11==8.5.0.96 - torch==1.13.1 -prefix: /home/vedant/anaconda3/envs/tf1 + - git+https://github.com/deepmind/dm_control.git + - imageio + - scikit-image + - scikit-video + - opencv-python +prefix: /home/vedant/anaconda3/envs/dbai diff --git a/local_dm_control_suite/.gitignore b/local_dm_control_suite/.gitignore new file mode 100644 index 0000000..09bf5d6 --- /dev/null +++ b/local_dm_control_suite/.gitignore @@ -0,0 +1,2 @@ +*.pyc +__pycache__/ \ No newline at end of file diff --git a/local_dm_control_suite/README.md b/local_dm_control_suite/README.md new file mode 100755 index 0000000..135ab42 --- /dev/null +++ b/local_dm_control_suite/README.md @@ -0,0 +1,56 @@ +# DeepMind Control Suite. + +This submodule contains the domains and tasks described in the +[DeepMind Control Suite tech report](https://arxiv.org/abs/1801.00690). + +## Quickstart + +```python +from dm_control import suite +import numpy as np + +# Load one task: +env = suite.load(domain_name="cartpole", task_name="swingup") + +# Iterate over a task set: +for domain_name, task_name in suite.BENCHMARKING: + env = suite.load(domain_name, task_name) + +# Step through an episode and print out reward, discount and observation. +action_spec = env.action_spec() +time_step = env.reset() +while not time_step.last(): + action = np.random.uniform(action_spec.minimum, + action_spec.maximum, + size=action_spec.shape) + time_step = env.step(action) + print(time_step.reward, time_step.discount, time_step.observation) +``` + +## Illustration video + +Below is a video montage of solved Control Suite tasks, with reward +visualisation enabled. + +[![Video montage](https://img.youtube.com/vi/rAai4QzcYbs/0.jpg)](https://www.youtube.com/watch?v=rAai4QzcYbs) + + +### Quadruped domain [April 2019] + +Roughly based on the 'ant' model introduced by [Schulman et al. 2015](https://arxiv.org/abs/1506.02438). Main modifications to the body are: + +- 4 DoFs per leg, 1 constraining tendon. +- 3 actuators per leg: 'yaw', 'lift', 'extend'. +- Filtered position actuators with timescale of 100ms. +- Sensors include an IMU, force/torque sensors, and rangefinders. + +Four tasks: + +- `walk` and `run`: self-right the body then move forward at a desired speed. +- `escape`: escape a bowl-shaped random terrain (uses rangefinders). +- `fetch`, go to a moving ball and bring it to a target. + +All behaviors in the video below were trained with [Abdolmaleki et al's +MPO](https://arxiv.org/abs/1806.06920). + +[![Video montage](https://img.youtube.com/vi/RhRLjbb7pBE/0.jpg)](https://www.youtube.com/watch?v=RhRLjbb7pBE) diff --git a/local_dm_control_suite/__init__.py b/local_dm_control_suite/__init__.py new file mode 100755 index 0000000..c4d7cb9 --- /dev/null +++ b/local_dm_control_suite/__init__.py @@ -0,0 +1,151 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""A collection of MuJoCo-based Reinforcement Learning environments.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import inspect +import itertools + +from dm_control.rl import control + +from local_dm_control_suite import acrobot +from local_dm_control_suite import ball_in_cup +from local_dm_control_suite import cartpole +from local_dm_control_suite import cheetah +from local_dm_control_suite import finger +from local_dm_control_suite import fish +from local_dm_control_suite import hopper +from local_dm_control_suite import humanoid +from local_dm_control_suite import humanoid_CMU +from local_dm_control_suite import lqr +from local_dm_control_suite import manipulator +from local_dm_control_suite import pendulum +from local_dm_control_suite import point_mass +from local_dm_control_suite import quadruped +from local_dm_control_suite import reacher +from local_dm_control_suite import stacker +from local_dm_control_suite import swimmer +from local_dm_control_suite import walker + +# Find all domains imported. +_DOMAINS = {name: module for name, module in locals().items() + if inspect.ismodule(module) and hasattr(module, 'SUITE')} + + +def _get_tasks(tag): + """Returns a sequence of (domain name, task name) pairs for the given tag.""" + result = [] + + for domain_name in sorted(_DOMAINS.keys()): + domain = _DOMAINS[domain_name] + + if tag is None: + tasks_in_domain = domain.SUITE + else: + tasks_in_domain = domain.SUITE.tagged(tag) + + for task_name in tasks_in_domain.keys(): + result.append((domain_name, task_name)) + + return tuple(result) + + +def _get_tasks_by_domain(tasks): + """Returns a dict mapping from task name to a tuple of domain names.""" + result = collections.defaultdict(list) + + for domain_name, task_name in tasks: + result[domain_name].append(task_name) + + return {k: tuple(v) for k, v in result.items()} + + +# A sequence containing all (domain name, task name) pairs. +ALL_TASKS = _get_tasks(tag=None) + +# Subsets of ALL_TASKS, generated via the tag mechanism. +BENCHMARKING = _get_tasks('benchmarking') +EASY = _get_tasks('easy') +HARD = _get_tasks('hard') +EXTRA = tuple(sorted(set(ALL_TASKS) - set(BENCHMARKING))) + +# A mapping from each domain name to a sequence of its task names. +TASKS_BY_DOMAIN = _get_tasks_by_domain(ALL_TASKS) + + +def load(domain_name, task_name, task_kwargs=None, environment_kwargs=None, + visualize_reward=False): + """Returns an environment from a domain name, task name and optional settings. + + ```python + env = suite.load('cartpole', 'balance') + ``` + + Args: + domain_name: A string containing the name of a domain. + task_name: A string containing the name of a task. + task_kwargs: Optional `dict` of keyword arguments for the task. + environment_kwargs: Optional `dict` specifying keyword arguments for the + environment. + visualize_reward: Optional `bool`. If `True`, object colours in rendered + frames are set to indicate the reward at each step. Default `False`. + + Returns: + The requested environment. + """ + return build_environment(domain_name, task_name, task_kwargs, + environment_kwargs, visualize_reward) + + +def build_environment(domain_name, task_name, task_kwargs=None, + environment_kwargs=None, visualize_reward=False): + """Returns an environment from the suite given a domain name and a task name. + + Args: + domain_name: A string containing the name of a domain. + task_name: A string containing the name of a task. + task_kwargs: Optional `dict` specifying keyword arguments for the task. + environment_kwargs: Optional `dict` specifying keyword arguments for the + environment. + visualize_reward: Optional `bool`. If `True`, object colours in rendered + frames are set to indicate the reward at each step. Default `False`. + + Raises: + ValueError: If the domain or task doesn't exist. + + Returns: + An instance of the requested environment. + """ + if domain_name not in _DOMAINS: + raise ValueError('Domain {!r} does not exist.'.format(domain_name)) + + domain = _DOMAINS[domain_name] + + if task_name not in domain.SUITE: + raise ValueError('Level {!r} does not exist in domain {!r}.'.format( + task_name, domain_name)) + + task_kwargs = task_kwargs or {} + if environment_kwargs is not None: + task_kwargs = task_kwargs.copy() + task_kwargs['environment_kwargs'] = environment_kwargs + env = domain.SUITE[task_name](**task_kwargs) + env.task.visualize_reward = visualize_reward + return env diff --git a/local_dm_control_suite/__pycache__/__init__.cpython-37.pyc b/local_dm_control_suite/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab9857a3de34cb08e1822efa645a38c58a00a38b GIT binary patch literal 4123 zcmdT{+j0}h8J?3yvV24~#s(G!Sg^&i@>VL9Y$%6{z_5@7Q-!^OoeDK>Yg*ReImkUd z20P=MHI=*MCNE%emj}qp%x$jn3VW6B?~!d43$gBh(|qHFUxzu<4o#TfmlUDO?FFZUz07&~|NAR^2K} zJFG<`?uZ&w4eQaUJF0LEc+4GBcm#Od9ap#xJmF3#JPJJNPAWVGJmpR)JPv%uJ)`gh zaKmjVJPADQPAfbGJmbzNd$U8DfVkhZ3PqIHI_mVF+S;~Fq84u!aB04*);dhceiCvup~tyoJwUsQgKz<4b%Rv3^pbvGwbXi9#Ns6I z(K~jGy?al-L){E_XwoPK1pO>zGNEd%&tpFevk17}Pw1c#OHM*E14C*@dTzYag(Z#L z@U=rjShAY7nP=h^VLqX_Vlgyr}kA%A^$NHCkU4R?H$^ z*k$LxG;tYw{kXi;OCr9s%Y7EhCCO9Cg|~8d3I8zZu+Z}(Zy*9M%>v1nJn%h`p4aN{ z{a9c2gO2PZQUA_YbbJfM)@)r|0XVs#ty%{+?4q)z9q4afNKNWT#yjZYiuUp$?uwhv zYX@cyFR)z`t=xQN9$4`hl}A>tz0(h@Hy`{1&4HPXXUxJ(|E2) zJBMb2JI4Mu&$!G)oI1=&`A){;4k!Ix^pl7Mu|o{Gzo>aDPoK0Ot*#Z;Hs4Fd94_SA7`25?n?b)~zo^Kp4|TK)6CH&u zc$$T>7%lhpNAdX;)D1CUBdFbpENloKOop=t)$uGoKM=B8ri)N|#S-)dm8Rk|Ip z)ZA~MvXo@+M7S~8dsigUDJvbBfai({#PA`0r>&EtnS-OPp-j(%D9vviYkqDV>c^U{ zn9$V44UB4<;v$Md-!Ams_gJJ9?+LAEfY{pp;wj6#&AkDOTgytf(>twG{cTo?(Niy% zLRP%XAT+vUVTC~|#dX@X0vm|ZrUem`+{7XpMU%)EM98RyBsTM*?H^!FS_9FHIwaOG z1W6OOTR>5aZ9+|Z5fj^Bt7~zV?zvWv3I9Qd+yp&Tl2Iv{P60@`43b}N=GGxOZJkA6 zz=^2hVRu}LiVX{8$Os%3`%cm)SI+N#$bXJccdzbAW13=H?~{N7O>p%o^~ zUtBo$!3(#X1-NGhTU%(BvuJW#Y^AqRI)i@Er`uRKPlM0jFyW#WU|?OOXQ(x_fko zk1nb1q4;+k9?LT#2xcq{_PHlGPU;UIbTdi9jh6GM>ujuxjBng>lFe7}^%Rzq3?2)< zOu+4M!TrJdT>%Ed?+H#aBy$X6+?PqN*jw)5FzHaVf(&@S<*bw%?`%9^Vahj@%m*q| zN^Tv35#&cP-XB&ex&ngHnawN+eQ$_W&C0256r)OfCG(2L(5L}Ji>ddS@V+@&q-rmr z@G8=1Gc{e@M(xBfs;HdAQi)0q)7Y+xS*iPoH55Y*^&fOH7epGwkG%5AME}?5H_)$R z1ia1Zf-=mYs-*NtFi`RwwE2 z6&Bq>{H~mE8XJF$2DOap5|rx}CKTd5D;K{(M}}AB1ln{>pD=1hUAOf*WjX3M1**n1 z&DGThCHbUIhX^>bVG7nOWAd(9S*P6QHr&(@Ggo0d1 zG5g{CS5V*;f5$0>&vk$G_vK@C4#n2$~$c~*LksZf+tO5o0+VHLcgE?@y?4DhY_!xKh zXtjgUNdkP5uet6X^8<3vf5?s3oOJU!r+n2UXC_jD00F{XHfy@c?yjn@zN)^vw$`<9 zeeoB~|8~x@{!TyY%Y*PKZuJ8iZuu6sxs%wb<2yFVE@antjqE}8e9y>@q~SL(-b|Wl z%Ws*!R?<#8e#gk|q?`8qo{>9{`+ncZ-DE9Y_t%ZwgS_Ey7`c&b`saAx-xBMm=g%zv z0$=0n&n^F=xNv%jZ=Bg;i#K><kOZG8jntyzB>nKmft;d3gnZBik(n1FJ@7}_Hl1E_@@HD7Q5U6Ra#jSdg z$w_hHQ0Q{*06PlGeOxC+9W<6B*U=XJ1Cd2XX(%6I>k-+4t_E%btg*|lA;2ECcAi<^ zTTfl>a(fQ&zHpv;b7%6S*m~+{Z`_!>b8qBucW%Y@-1(F9y$$<$v$tyd)c`mMlQ4?} z)1i7ad@yi|rkaFOiJ~d7(L61>I!;B9#A&Pt&7$??(TR#9Rcwr8o(3>qP#U^apmn#L zPByX4qPLwrj%A*uBGZG0+(Ms3BwEFmSPle_0EI(Uv?kTe9&Kf5cezGiwA3+7GMxC}q?eH062fxIhiLR9>WptRXL< zS+#<^OubjAxk}9fX{E;NmBvB2=!baaRmI;xUxa&c5tcs1t=>Z;EZ+vTIo$SL4kUn9 zJnr!Zo(KPNzvofzFO&eZ)e}WBL^)Y$QU2p-^m; zOA~iBoGLzvY;1rodJ>KecF6*BYYg;`u`BKzVpk*Qs%4P*tJ>H{xr}GT774rYWPkt% z6J5Y0;bIJtvQWtHD9~l_WRk1G$tFcJ zi2<_1!pm}*BF0Eje+?f6IuFV@Ue&v^82JM!P61~*eS6cMovW2!ys^-pz)9q~04F7! zxpnHCLHASl%o;gHS9|vUs;&xY3<(H{CCW<~$Rs07awMcMJ4}GffhWnHMGq+<3bn{^ z%b%i0{tV53@LnI#2hvT*zFVlqNeh_>+4U=l5 z6$Ctwf?&D4{5fspU22y2F%prjp@HAg>O{1lSf}mbucWpJU5ksd5?zYW_Ah+1_MJ6C z8O6Ql9p5uDIOlmA%+um+!#o|{HO$lFeLUOWA;tL}0BhY)=Inzz8LLTUe-9a9pU`AD zkxeJY`is<6nSQvl$O44v`c@Q3bNDr|G}7kWdghGXISA;5jT(%~3OD>MpztsQyM;+S zGtOmkRv{YB-LKNx{;=C+xb7ff-8K?qB|YC}ci8?xo+tb05K%ljq2lBCNB|dL$ec|T zpi#ZdZp0ZXl2GSzz{*l+Sj|=@-P`PoNy)S~nE>@-ijeJR({yq&EZ-P@WwP@wup;&s zd%X#}R1-0DYH^4!l@Q$Ubc6y&o0^Da;+XBPPH~fg$alcA#>yfQsF$!^e7cw~oN5${ zhQE>%c7uyiI8F4R+&KVEnSE#A6|E{`6%927GaGjjPGi_N2tZ_pw-$L5jNA!=C_zaC zh%P}$3GW-+%7CoXv}c=@7l!p*5z!Z2^rS#0mn*7}h{jIg?6KRks6`d&&h0VkqH&XQ z;i2=frCYj9U^wj%r2z^apwO88br%()31|3U3(7ha+bb0d3w1p%O;!?Nbi~9YRwzCz zgzO&W(}a`XKocT4$d6el1q_N$b6B?YA{YbAM+x>o=wl%=_Q!W8@l7^~VOC@KRisVI z0tryd79OYV?N{UgbP_mUk~RFcs&?Qv%{EuG_O~wbNdoYT-U|8{PNl%j;-}htE*R z&TYfRh}Dk~!NkajTexm+qIPz-X5}zr`GFGhaapavCWeYv$kO-$)Qo~gvd01iI}`&= z8xky#HBHY6;0wY|2>$SjDtcc?I8pHXI6stpc^liSF-)~v;bP?#7B$uC237kjJDQMU zbjYxrHmcUXJ-b|+<&`%V3`VG5bk&soV-FT-oPojSD5B<$e1|(3pv}D( zE&+K5o_Yu0Ny>7jc}Wsg_y_qk;|fPNq~kcNGI7x;k3MBCR-i#i^j{#eCIWDcgvlDn zYbJh8-AX}>#B8#9AVVHcUo%B7APq~BJI3p!8SHhtYa^u3*JfCGYXJ+wF`2=@f|jDF zCL$l>RQTex^I%n+gU3Rq(mY4SS}qG#R23`7pu#va(Tlu{lgl&$2@e5enjaRskB9t4 zDKGV0CQ0Mo?oy?vc(qQ_TeULF;ekzYNA8oisCkc?_o*RzlN2uU6Kc%5+mMR2iVP!> zBsC$@`gO3G3{9tsuW>6Hp$xs=@Aum+cu ziB%HTinV3OP$uOe?$W1@oG96$F@v6lNm_<${PV&6?FVuttd_3OY--&hXa47$f6`~zs3=bmRT-Dhq6eAiib%yD1?H@W64P(lOhK{KM+Z- z!tOAS6(mm~s%67|5f|Xqk;@CEKHgLm&_)c^D4|?b9r6*}}tyWmE8og?f>)ws~-{ z+d25VArKc8`R4J)QIYYD6V9SsZ741k7vav<2L96`j?$22;RJ+G4im*UrZ*a#m8(6j zs`W!y7FPEBhz@Ll7?xN7EoTpT9v@|qIK~l5hez@(CS+*1lSiZUEBpXDrm`-KuZ(kB z*<&E~NAuhnTZ6X}njsz+M~Cx(In`mgF;2}xJ~X8 zu(HTnR@qcZ#=|tp5>+l%mL{FCW!DKs! zOuhAxn5P+T9I5!|r(oqQh+)r~P17?SbKVqh<9AHL>p{SOj7Rz)+z3o$y2Z@EW)`!N zbq;fwi?_>a0Ie?WL2dMCa$NsqqK^ECv4qkgO8qX65TIQ8-9qSyoQ4%pQm(}&mY_jC zr6Qw!`5~CG(T6wt*q%1;A?hC(=jKFe!;o94H8#iAx&f2i(Z;kNPk5}bH@QI@KoRJx zLh}mXan&ws6;qn0r8%NxP1BZBx_OjwRyr(FQQ43P-|%!$$kHL&vP(zuv@5<3JH-!( z+~h@t`aCTYsv@iwr`VL_Y{ObJ*UZt{1hV#BwHq)92nucu#ts+-I4;dAymN%t0!MEXfS1as1TFMh?k8XM-ph(?WRSfFohr%Kc-1E zvz)0=(n>)?V?z3Gg4#flsx&{sEJ;<$ux5ehyg6FBM)XGKX{%OxVMu-q!?GDtsTrok z&xYZPVU$k3)WeV!aTr#Icpqy{?W^5Nd89-AmteFTkw2DK)3Q7HLqSYK(lXQe^QF<% zVf@OQ&LLJuF4ew7+y`~|#}oH4ZPf03R|WE)57m^%DtA%Ij>#!BXmbi2ddn^dGtaHl z{jnvUQ`Vq=AY-nOD>_h+%8zu8L2fg5N?wE9ymYS&6K{l6jn$8xbL&Ozw}z@KZ){vx zmyI!c5t_3Hvm2wc19TE?oK%tG)Lp2GPSq+us*J~Xp}P-{;v=6A5{V|&ZZ>_4O%$_- zn)CfhNqgvT<_Jy9%45Ck7ZW#sF`3ip{q-+2zW&!W^EjyK06Ogf0gV4#3r2z>_GznG zRnqz}Pf(XK9k$|!_*6PCD8px>E_E{0Ev`x@M}03{IUI1Iy;CkHIqF-KB0j^3{pgrF zmcNHZf|&u&xe}2=yF%A!gkhYbwIRN4W9cCj_wY!HEW>lmnze4OTb{XSj^5frcIfWV z{{Nls_?rB@O{=^MLid9!^f-9Vx-!P7n%axD<}L7aXRt@_UKfBU_lrZx#R&{8z*ipA z?rWDGaL`4(^iDXMd7{ox5@+HfO;J#x50st>#sz0PxJ`&hFhKkUWEzRm#s*WVq`T1q z_57)ge^2&TiR_uP_!~XAJ^vfJH;1Qyr$-;2Cgae?6_tN;w0?6yUk)N6xi2`{Btr)m zl$5AmH@f)C8RnNHT9QBFc)Q(nTg;*8MuAr@ineHjAdTaEE@EwAXN#?X1x0g45*>=v z&J68Zl`pk>>!tr7Irazi&+yvQ9t!l()1LagLs~Xm1Dj>d{zX47E)Ro8XzFk1xYf6~&7H)aIDN;av5T?Wcg@(t*z0>{TuW+`dcSVQ z^`tTJ`@R`BlBIt0xy60HbZ+q_;UBin+*8`Tc!Z_~ z=!Dg`fsw=QzRTCS%e`~E?}-|(@%lN%Q|ArtBZ3BB;!WIriW>K&GU|R4b65GAncEVr z!#2Nl@ryf%`F7#GMv&oO9m{(E;G0dxwoO2ag;KCdIGqa4cq~PvlM|*hgP78x7K&v< zwwr&J?PM&BbROzVs{2fY(U^%uOhl?N0|p6WOaMx-DZ>*ME0!ymg*7F#d0!!hluUDN zQWP>3r=!GtYHWAe>#=~jOmYG2N3?upx?G#=(gPJv@YN!{Q6{BO(=6rm(e7tE&)Ao{ zuh=k7MAyWtwTustXmuaf8Ixhkv&nt7%?4>cnVxjZ2i-mMe4j!ez)xV1MuMEevTB^= z31wj++->ThzOxvx-y0p zrKT84_%~U0poBaM4YFw#D{+qr9hso+y&MADRDtW}4PhfvT9#kXzDnIB<07G@sTeTp zIEJeTmk|z%@8=oZ>wfQ-KZ)FurzSqZfgR z=At_l!w#CdhfZ5_Tib`up?l`2J2ZOh&h2;hsXe#nR^V~!YlhbD?7uJ5SjS-!&j>-5 ziepCPiPJpGl^Iek6O0B3@L7(k%wFd9^C^KACYwwEcp+ivJ(9i2LJCfck&snR7*GbJ zq$!aUNCj{zof=%_3vwk{FH1$q`2^L3Y4qvBPMGU#0s|4Ka>DL%F%0uWcL+Mhp7lEQ zq6zJ(XjeGtw5XTtC|XxmDjGrc^i}|^$_-9|6vv_D9jEY=NQRO${2!KtfLq`GW^0^H z#MY7EVXC)K1W0Z0n1rt0YC)}aNhvS+`b4?a~>5yitedY@c$JGN}Y1a2$1nPAFp zjeb&yzeQ7qbndKWJSw5NnTYg-w%&qr?`-4K!#$yMnG(qckFy+f=G!D+`|xrAWz^A_ zvQ3IJn2t|W9I3&j5OzC`tifc_Dy1)=;P8x`EgIGQ>t3(pDdS}zt3HP1ZzxM>hRiTcv3CbdYI9Ku=OIEQ9u?>-LD$-I}O6rPfte^^2{p{xl zX5%ace_+y2egT7AX_!jHh$PPJ4G@rt(Bq6(mZw6ZHYNbd@EBlW^-?3MDmL)rGNxgq>mSvFU>wQ znE(Hr$i*bO=uT%H!{R#_kbFU+>@ydF<&+FU8fX`GXOH$N2}F{MEl$NF(oS;>DR;(d z`vQPRVUh!9983mf)(74G`lY-DjA}H zehvtT;}rrXawwz`zH1DS8?d&IW;Doh>h^{`TP;hC%E2J>-J%7`8Ru?RpknR1{Qi+v zG;-w3J}|3YpRs_OW-Z|U8Ge*IBzDJ-HI9sLT>|+G)qZCGrQ9QHoP^Zr&3^RsPY@b9 zQiSXv%MuZ$n{1S6_UF@Xr!GmR3y)qD%|8&?o=Ta?qE1B`>cZ0}s91vhLaJ(plGhRZ z31)nSRw3B5*X@qo>nzizkmC3i1Vu9laJ0x18n=VsYivc;n??}uED8d7i=6SNGQ*ns z#Avo836vjD_YrlJDoLvqb)*`}x#9`!U(%?31EY`86n*WlcusSHAU1cm5ye_nVL z+7;Kxt8beRl33{|n@k@H{5H9-Te6Gs_g%H$494+joS^AyT}j)n{FFKa#3dTEz!0rk zaKS3tajB3^5r2sRiI#^)Z)~~2oj10+lcH?31x{Iq<~(-l%u&!ES*I@UHQd2F?%uXe zYxCON+knW;Yd^H+?gkLTjQRtM6H2w&^M&j!{Ki?U!XC#c9R3ab-(M5Hp(OP>jbde6 z;rL7ExRS#)$^kEG=0IEcpmQKc2O$#nK)%*f6=s8>=pNa`MEd( literal 0 HcmV?d00001 diff --git a/local_dm_control_suite/__pycache__/cartpole.cpython-37.pyc b/local_dm_control_suite/__pycache__/cartpole.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2edbac75aaaab748be51e836542680eda6fa4586 GIT binary patch literal 7950 zcmcgxOKcq3b**n#|M(F}QKaTqBWcEJSZ>W2MuI5H=*v+Q&rpuYv7G{pVzFLzvs(4( zc~wn`Jc$HPMa!@IJ87Iw~kRn^_3B*PP6 z(qz4Q^}hGrx}S4z-JYAPOZYwbrZ+xRx`w)DX`-$Nx}~GP5>!INGDMpZRKuE86ZL9P4;xlP)N80Wt){5ggSl|tnius3 z>I>F_s5gVf@Qigv)b(J=TE^OQ!Af}6I*Yb>Yt@@SIyaWAHTE^O$j&^It#xnh=sa5* z%ib!}nch*^GFy2jKb5W5Ug$Z?Rz=V2FZ7&aYoh0k7kbv&dC~JG^^B#-F0ZlIMf+Pb z?QgI*L4nTRVy5Ub+1u>GGs$}Up2RM)OV1>B$uoKvo~qUbw7kRKMa#QhqjwQ4mzcgK zwJr}A?>Ic|$AM?EICT7|eN6ADC1(rHZW0HB)U*AtAM+GVRp#&ei62L(&-b|>rFLf! zxhYyU(NqZsz1WRWGu${zc^sgs?K+7^oo*b4VrHE>v6IIW8p^%L4rf%=y_9=2R_9LC z^^Qq=jv*hCO9lPtuHd?ZE15@;N+ap1oXS0=qu@C5aO0tu4tSK9j+qYnI90KkA9c-i z&ojfAd4c);k3TY#6bI;@K&*2eZ0 z=&MD8&?cTGKa=@8 zFF0f8*nm~Uo9Tq-c<8WvF0o1%zVL_LOecx34<=Zl0hG!C$~PZI?8jous|x)phJ40Lzhlj zpvnmyGKTNZGHD_rvlhsF?5BHX`ZzX)32q;gk!;EQ3~v7q!c)V}x$*x#HY}89v9vDl zqV1S$^M@{3X8IYps4gt@lt&|l$%02Zth54qoZFeoNb3`iG-0LnQ3X~yw=-H{rS;*T z|FJyUgFiK64zI_(f?Fk{{*R($OzpB@cy6iwu+3?tL`Tju7mPT~v&(3kxOj54bO~-% z;qX)G2tI5qAIc+%DUTGsI)blCl_PKrt;$F_(wK^x`eZNFiC0WHCp}f}N=)u$-O(!!qG$tr+ISlFryPGYY2B+~`UGQAVTF}IBTh{-8$%@;segexJV zD=Bq>VCr&RS%JN&%d7Z98`{s~dQ(0xiy5S2lOGw!o4AtqP?-n@DO#E;VI0X1}8FLu(>B$Mqj^_Y*Lo~s@lNhH6 zEG8;`0>u+P8A&~W)*i@Ohw`pV*jOFDJ!|A3Wn!NaKan7}0X_qtd0a>aBvL1dWSPdC z)XB6@F$Y*(OT3_Sim%xm&4+g6i~fk%Lwu5y;o0J(?GqzTkgj5&uuhApPMm=ps9-qR3UUV{=|~yVY1AG*;mO9Fq z0vdhzC$lF@QrONbx#UjIWhVW=Pfb{@81fxm1)arF`8{lyDNlwiBU2&-J9}8ceZ=pg zdCCC(moy47>#JYi;%+>E+Ovts{T-$x8VXYC;abUu)5A7f)vRjUB-gf`)on6k16X4_ zYufgw11BhY4BKY0Yuh%mpXuVtn#d8RYV<5_8AN!tFbx^NtHEN6ZaaMWAnJzKbJJgYT)fG ziZEvQqgzwFyR(gGZpXx%DB=`hN$6d*>vhxIq8DiT30@Y=ffM*ckC`6epvO%&h!cdU zv6zt}Nc2(){yTnm0GCM?3wFs#O~=ebRV;U+yN!mH(#=o)@4qZ7H_-O$f9U^3{n!6} zBiC2ALYJ5S%uz?V1YrI5x8bP=;z++N-0H-xr5|R|Dc_ZXg?Hb4Iy_skh=UmT>;M~^nzbr z7xI^QkVW!+E&zvMp<_ngV{>PAi7O<8n=#BQ$c1O}kahxzm^xX{8Kg+WiPHxM=4C)T zXAq>VoN}01$_i|@^voz?w5(>Aa*^rDpbvjY*#WW}q$Rf9(sGw_GO<8uEZY{C4R&K{ z8NQ3T|A;H0?1D5`kt_0Wv5?7jxuh$+CHhrdG?4Def1()4vfnx_Z@id%u=HXLAjX{*#O2m&lo;Uj1A zQ)FRCna~gQL%-)hJ-xo4An{nJ^sM&NSfM(Uhs=GF>&BerDroMMuNIn4Bm4+p9@6Y0 zshS9p0Rp3BXA%OC7M88Fj#JP9VcUgNJ~ESoFhrL1;3dYTpC{CyTG2$%g+VHu6ATh5 zVE40?pWpk<@YxN11FzdMa-9Eu0p0)QMh+6n$6t1T`YZDnd~ZCdZ8mri)MW5ZR+|qicIPInK6ibLg##gnm?gJ$okUMAJ_%20a5NyzsQ9B4Qf!G z?@_~4)PI1-f5w$ilT@oh)YpKbFDYyC1^f-qmtz0o>L(O{grD{jD4p=h7)jQVEHDZ% z?R5zEkqW$`km!$u7ntG_1ak3i;_>~$N9CyNa)AU|X7O4`pi-}$RZFv)RZB?O(z1pd z2V_g&<&v!NfCChJ>@ITJIRdaj+$=vb6I43Bt)_6`-y9SRufM>@+gcD}era50lTq zVZJ6IQinasnMo1LbLFrC4XzZum7>=e(^n%zGOY3(5zV@EXfVXx7*)j@Mlq_2QB_*w zsD>S@Sf^U7gIvH-9rPHmHC0>{qJ&W(Qri?>Ux+nqW(E28t{;hzBdY;|F#;DZxXWIC2A=I8oiD~^M&ftF*k{KC7LF+mgqVsl(pfD9s)4w>oG!)^QFt<8HI z0=;F+HtHX3*moXm-QV2Edk}u}nB}Qo!g-k*`cbBZo)fidGZIe;WNTgoF@9tVX-cex z9EeXEpic;uyL)L?brGPF&{%WAl9V4Stoi)0l(o}B)38Yva^LzN*fKFuNU}y@DXu1h z(?xknh>s%5RT8A-Qjlh5Pc({L3o%OwQ5j+prs^RLBoZOY>{p&wx_TJQ1VF%Z{f-aM z49h8OEi@$M12>I%8y11sg!qf&#<%i1?VDQ-VJSo*6r?^q>VIKO8G%{S=ji7*y3Y za9mU7&oo!6Ms->AV)#7GdPi(bu|sD-@i~?x4s(?X;xpetVb!M}s5nV!HaBf(^C161 zv-|HLO9et|Z+NW~>8-lgI) z6_i8~fla2-e8HU=Ii9}Y>h(Ol`hb|4{1pmKA;Z^Hs`*_=FtxxSs+wF?>pFc>nuqH+ RT~0HvBwGZ=?w+H5_jkVUoO#meG#z+8`@7}` z9mn|>{@6So7>}T+e?rBb$l-+3jN~+;1Z@{=H*&4*f$c?}wf)SGeCV%bwY(nHZCgDH z@^q-+4zaNwoiKhxhsJJBROz`t07N8{LDJJ$@Hj?uypzKD0dG{;@OI zU-q6(h0yUdJS_7#Er##mqRxPrUVN&`Y@r29^LZ&XGzC08PgPnLV0Y&-Ei@Z1ibO-p z5j55E#jH$9uJ6Lu0(;ZP~w#FDG`=z|3J-aIZCPobzC zsI+rMIJqX;nb9$YgVW{crO*pms4x!oVxEa`y>41eLOm5>UUHFzzkl&OR2t4Y3FCsp zSe_<2EXQH2l+bFpY$etGhhOH|z?A`PW|~sOoY@6T?eX#P=;Zisj_g90t^WP#!L-c9 z!MWgZp%1iBT1a;I}pTiCA@x0bZ0sWXK?fcvB1ZNf|Rtl{1gnpgJ^o`eB)fW+Fb!gUy}=OMd#m6oeQeHnZI&Z z-k5T_a#FIQf2H3LK+j#?-Jpk1$wd+BSe*^gQ4&s7=BBCBT(B(7Q$47e`sdTPDovE> zjb)iLz=~C0h2v6MK5cXY;Gj+t0xRKzZ`|Gb1h2Vr zX5f_>?Eb*v^nqjX{$v~PX(`hssvry1TnLVQL_e&z7)HCgl&S>ooN6gVmKNtClep2q zHGUE2f*X&=IyOxu3ROz9QEWlnx0X$r`)xHKYyngvRhl3Jl4c39T!@9bscUI0Ny}Gl42nONYGI9CK zV~9W2?Mla5Y&ftbkucVqeraQth?8k3=Ba|vHhhnKc>fQmEHjPpsC^;~IjIC%Ulb|uJ(u5sWsUcGUMk~PLp%s_bK_xD zi0&|al?%=0vYdcOV3blT#aX%(Y<=26O@0qg`T*B$rt0PpepbE_()f!x zOgDr9iyMq38H7ck+AX(!0u!G?QK&G--6L(%C;M)fv}r(UWcks?@Bh2^4HJ+7SnN7T zK+G!F5GTP~bmn4e{|k8i%DZ+}K*fi^lq=ub9@v<5{N42q;EkpSu*!su>n+6kry2~WhZz0gOk}}JL&8fMMu)1)bEnV99*~4VBb*x^+T*a#tNA! zA7S-ds7zFGlS7vlq`!2#zrrlz;r{*~zH-4saoX+T3g-zwvN{jR{-m-)Pzd k535x33;g0y2qpjloCHZ%Z?s9<^IVSxZr}%`-{=Sb0sgiTA^-pY literal 0 HcmV?d00001 diff --git a/local_dm_control_suite/__pycache__/finger.cpython-37.pyc b/local_dm_control_suite/__pycache__/finger.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..38d1d13de3f10f5aba211374607a185c943e76e3 GIT binary patch literal 7798 zcmds6%WoXVneW#;I5T{RqV=*ZxBS#N$|#0+Hw&%S5+%_N9hs6vW;bqR(P~Zg44dkn zo>bS!WQV$}l;q?fhXA<*SrS-av6mo05WvTra>yZnK;ME~A|yEn$SJ??>*<~$MF&a{ z>|uxOs_LrhdVG)H_kC5@W@j53ez$(6#OGepwBOK6@v~64iYIwr*EFFyn$U$2>XG3X zI+ab7O~>T2g|g*XT&{!_r-JrsSdD5w3 zIW9ND`Dnpe;BqBA<}6~oR(L#Ga+Xjx>pUmsoD*W+IVl#LQ{tF&S}Zze#Bt|&e_`*1 zea(4MEQ#kHYtC8!#l4rriGAIFPEmJ@ z@uE2USQBUc+SV(NOy?EUyd-SY*nV^CRn)wOnsef1)Vw^dd0o6NUKOuB)}1#*WnF8( zk1_R~xMuVb*;xNyP0}(r z`O~F~n{nh{eBcXjpe`yuQNDCFz$MxD#tW#SE8~&&9qpm1%w16E6aAsJ zYYblqw1Llk^|nu5NU z?ARL+$1xd=Y*u0A;I;HbFiLo9Z6n|22~6Ft%ah3DDN2~<7JUxltSqm8Q^?RN^oT8O{fofl*K`dt`R{!vai!5BfpMaa1e z)Z8++EHb%$RI4(@y+B z63e9H)cIY-L#N55d$Ce+)UIU4V3<`f@LR|RUgV3+5}xw1M*b<4$+{$d*gGt@6&~qp zSo|0i)7deFI^1$#v&13Z>>LW%C?IhQ96GJBkvLy$kznqD4|opL4#7XYZxZs`TQTfl zC!6Cpx99^~a2^B7HYIN&Y3uTHR6eL<^7E*>i>FxWiDIP(hdZ!ja)o%w7f_g9G6@y& zr>wJ1Yen5D+E69EJiVdw+ZWQdJ&Y5Ul44sb7~0Y&IeOrOU6ZYLG81i*MC1vPEvpf5 z!f-$1X0sMPcZ=!BgeSBW%~;aYrDEUHT@LKJlu8?)93_>z+FoT}>ltI|EbTk7s+csn zkZZ<-eQ-Bm`=vh&y)I<--i}TCFZx43hcey{A}@q)28r^*c2&NP6=c;-ymjv%VzVS$ zd^E4Kg?sU6Abe4hAVA*<2~HIxINa?38A|38{FRJ`VuJN1ORbf@JlA8R@Zdu|?~F zWtKunZUcEid+-tW%zan}h$Q^Rttvq%FWN}oonAB<1;`&u)}UJgOuhs=8Oa{>@Pa+? z`#Aswi5(1*K=`zof5ddmoOW%jgkb;^)6c9yEF<|vY9`ptD!KPP17!2v1fj-fsN@{2 z!!1eh5Z{hDHm9d079Z@i)^25W*CnImx>>^|cYG95x#hZFA9>-prRKUK?z*ldNoLhy zKFGJ}^&co<-teL?)9b`EaEW|VHt-*zkdTiDr_`uh_^aTripS#iE}F_8BP!mKcaR+L zIGW$qdWe&S^?1gyxD2o3aUH%#P1NC2RK<*FAg_H}v*3BaT(snA43f5&?XkCTcLVP8 zT@o@vezpt=Z1<3WGZs6YwJysKKBAn2nVy6kR;4M(#v^0P$m0wI&J>54>F-wuFh5=h zCQ&Rl@ISWilIMSy_^v$2NT<=T@mq!`e~C-BoyQoL>|4X!zqw%h@IMg5diLGHC>ri` z@*bT#9BZyCPx+GpFCdm14`gE&iV_bKrgS7y7O~c5jT!({Co;5W8@jDJqvWie#Y2 zl}9*QQB_qVzU|d_5kG@#V*NxQG(zD4so*^ zha_+C=aZ~?OA255;s#D5rT~Sznz;xOEiyB{mrUUbhr9C>R-)dQ4YCXG=C zS?DiaV$vMySkk3XZl((XlcJr2GRkOb_i>(uo$KkTV&@*v!jlPkSqR5PhlIZu#-3tO z%;wzn+pBBtzpdQ7b$xZ?qiJ3qUV4KgVjktcXw{51Mmk@vHlLxbuR-Mg6+I-Opu9rK zRZ2*AB*}wZqvS&*+3Z-!x_%fIibldy2omEkN!}woOen(Pxkjs2ud_sUQD6T4zW93% z!Oc3-4NX|ugd|q+MzQgAhWlWLFH<@F2;F|%3fJ46<^vEqGXS^RHNX?i2{{`YL zNcI7uwc$&5rBmL=^^Qo(&4b4F+XF9b<#CIAu_3YjOpv z&h+g}PaUi44c}sC4`49y`HnM{vf5}6z)nZG2WSj~NeGtJw&5^r#!}{{rtia+ko%XJ z$tXe+3^KDDCs~~xL*T4j$F%ZaDESg4Y-X56gc3MqV-^vXI;}A?SxTK+9-wa*g_I6H z>8K7z8Z7MYc3DabC?Qzkvo*$`pW==EI!#~F&*+Q%cSc_@T1I-3`TVT5iI)CX*(RMr zWhlPIHmk~I7=E@{)k&1WHtPc5W$Mi4X@e!nyO`Lj+^r>#7GC<7sp%#q6StTBVfKBw z)0DlEbgXs=^ckstj{0N{iB{UE+)go>p2+vWQ_8?X^?k(uf`zd^vU30-)|7&GyaX6_@x72<(hoNfAwkJ}Io z;<6!Y-5Cw&vSCHaSY}m13{SPK{J5LoHfu%Rwp+}e>0=*zqNAi$zrvFXo(Z3kg|BK< zbp{J?%tdqiyu6BxBEjBB;gA(h1UV*>z*rK0SPO7=TVCr(;#f92sp!Zszv(z}^bL@# z(J+MaxM(bg>G17PXQYBKVX9>dQ;m5w2@pTUFC{#?c z%KBGUH&*f+3KQqie4`>?p^;ysgf1~Mi~6!F#R8rqG4o-PkXmH*%XzH*4%H`Q9W8@B mR->NhWB(O?tYN`P^(I`!z> zs`jI3Z?I7$Bxi+$KrCh?Hn2c!Sg?XEYgVZh2(cK6U%-m*oT}<>kL^GrB2hDS`_#R+ zZk>D1x!-s0JwH2JS8!eVeJuX8q9}i-hvKS0IEy=4Q58ifZAGX;^Hg7JYbwb)WWBAk zY(O^J2FoR{)GndD?3MjWyTWx9uj<#@HI}Ph-JfaCuv~-OXg64{d$ayrdyeH9$n))a zmP_73`zg_AFN)dr(_&7{iv{tNSQJl-rCz1I4af7lN@A*4zt3}P925RGj+a;UKr`aZu|r$&6qkr8#3&MjMt zX#WL#w)exO-@f&?{TwDMI_DHqw5keWb#2dUCw59GQtj z>pFr39y4P*dcQ@%AfaIBH??%3m`d#WG}QrI4$1Od-;JAj z%-q>zD*4E=43*QF_28x}!@zffIDN{xaAjlb+Lg=J)}?pWH@DWWHcRp-y2-=TchQ;j zwcbYv_M)^h$UkO~q7$FUL-a;vn9bs4JVXr?iqTN(YE?DVIaNM~=g-BOM<21~<}KGR zOv#gJYVrs^&$DY*s9=X1)SnF`Pf(?aV%ik)S*krr#TTd;hhjEuBf{%%pdp*H zJ~ij(QQN^iz9`Pl;*KbS9Hp%SbTy&2b)g9z&}Iljl<->;Wl_OzSyV+0zZFpzGx)8F z1|n#dXONuDU2jINKlE%u8S|DK@0hj_E!9zQYSLIq2yJ`;fBy6BS|LSf{A4Eqivnn?j?CNJ8zF+3J@9Lgv}$)A~2-wLHCwnK5J zg=j8ur&LRlrk<7p+jm522s^e@(> z2u-qyUrgDO4R$ zGo9lXtbCw5SJ9==_)wwo!(ARy{IMR0b@qUo94pi(&H5p@WlG~#(~wQlh&44nfC_2F zdi%m(sqI3eM8`RwM`e0~#c}vBzMA)$`UZk>xapg-? zyh6npDkgZz5^)6s*d+*znP3R5YGi-h(m~7kGIA**jC}|*XFgQANK%Dyzt%Qb25#Q3 z0zE6D%IH}WbwbY%6$97_UpC}%43ey!%V_t;7HI4SVFXsyykzvM}}Bbr&P#Og@7r}ecgB|v^l>5`uO<5Gaww>>vu+(EEJu^n_cLT=D)%PN4nk!hW_AAE9=p^#wo*QjHtUY9{m(v z$Oke}89P0U5jl?FNq3NH@kq8AIJe9j(?gsx!5AB1Kro0mJOsXJ^Q+_W>|qQdiCrXJ z-kla#V*yk4utD%14F`^7gHs^6n7(BJOgVz76VSe9Ssf2~7{X@~8|Ts1#hqbiHCf25 z(<%lTTc1LM5;L~`O$fBWvqY5^bgcFDU8H1t>QMztyVNiDE3~{RWPYrjQEopMS1&5x zU%jjIdo}DP%6srz)^ntvQ9gL}T}4#5rrw_s)iX-mh-b;ayK^I?!|-km-mNDWGSjhh znL3b`GrJ(a#xLO(KT=yLUF2UGU*}9itlZkC>7<54q{**B+^6kX?-xI(>(75Igf&S7 z2Rjm=aleh`e3r76C=!BH3kGS~1N?Zs)CfZ9Pe4XeZpKK9Y%Oq+6!?TkNYrz#oi++zHuW8IBv#qBG!@VviD- z;dbQ6n`5)H&bez&O;IVkDPQ}66Gg2I$tZnIs|B?;OX*ApyQ?e{iqe@YQaI8P7qPjS z^q1elO9f`Ly>uusg@*(f={)U9rj$!i8S$PS0NvofWVx`{fxcs75f0HxU}G&opzTR! zDB9Bt{ejK~Ss5F|d>GQ2+CIgV{B}_wTPtse!$3sXw}Ri?05c)A(pnsP4nnQtFyB!OH&qah{5p+6I4{?zI7`KODkzlXbt>8@(%Bq0I*#WR z*g2Xsu+2I8!`A;Sg!0c% zA{5%vd=i^n8DtxTQ>K&i%KsOem@f#YYY)A+d5~8!bRr{jWE9bt5z0XXqS*Nf?t)5Y z$u9qaM}CL?p9hcR9D~#(=NMv?8lZ|et41lyRDFG1KO-7b^|Oy*pgFAU`DASq$0@vR zL0*w1okxaTy1(A<@6Gg!}6Y|MxJ}zoLUdx#% zRd0`_J9)1^to2^u>o1dDNx@d~E&9qgsGvkpzD>n=4>KVu`AurLj3TXahD>_}W+;vU z0)}Zyl2@qvRjPd~AN>?9e4?dbqn`26ib{XUGRMY$OGuR2ekwvzcS}r8MovzpjCnlo7la=2w1un0L_sz*bg$xnU!vj~$Tw(d-Hog~vp8sTVzuAk&WZnsx`@98HJKk<{4>y`{4Oaw_HMBtm>g;G zd{}%iC&$@pO;UaH1T!yf`np3pJ_5y(1*J{}u_Z@XyFNK-mE`zzc2d!jUUueq=<$cL zl9Jo(z;|KltzqnXk*rZ0=Ncv6;;_Nfr>%?YZ=Sn;d5aG$t;?6*xwOTHby-3?vt{>s zj-V}Fl$JKHU)oyF&i8bjhh@9d=V{zk($t`CoC)xZCq~L7GErJRlWlq5Sin<6PBxIk k;!mq)zj=06)eRhRYEyq@wW?Q3eDGPXs*Cl-Ba6@d8#{AdRR910 literal 0 HcmV?d00001 diff --git a/local_dm_control_suite/__pycache__/hopper.cpython-37.pyc b/local_dm_control_suite/__pycache__/hopper.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..67f3c5e34aca899bd062be311d3a7f749c4f033f GIT binary patch literal 4591 zcmcInOLG*-5$@-_^cF%OVA&=dHalTFu)BU@ygUWS1_`6twWGrktyWjfsE2;cR@DfY z9Zulb6MJ%mFFr^|UwrdF@u^RC?1{hNlfSH<83|a2Pj0Deva71AD!@Yoal$aZrH9GoK)8Wh)=a}-hHo&FS&5lizGaeZL$-Ze%MN76ceLy#uJ2;Jl2p>F zU)6opq?XqGx|VB6Bc1VQv|NWg>(6Srk<6v@{=AlFl7)29U)1s}XDa9nvPhw46J3>b(95G7QZ>pmCGrHK zvl~iIgHfKQ`pp>;X4oqpav?F$pt;z%e5wlmG!#sBzgqd*y?+h=bp6c=j??{AU*BGT zW8o>H6HYE0!fo7g5sfkqOlBUM%IH}g3))%3^~YQdL?*q^Q-glOz43N&*7ejL_tKp4 z#QXij2cA^WsO#Zyn9Jfwd3nbRrQ}MshK)T9QtMfoG;LACUPVLjkOlhvqH=S4d%eB= zS3}S-bo1}4m-q6NU*6{|%+zJYrQ#yEbL%qxlROHOfTh70r$7#3#W9}@(ds`hR=Ql3 zO9wblP@dunsYw7BmRLqx%s(2cTgp>!tCH@xoR%Ge_|I z*VYSXWcAO-#tTb1J$Gb}oQ^dzRE61pu}0RPt#3@==L~Nj;WzdZwHGQ4);^b>${`YD z1NcLCPhIkM!z9ciLNZp;>*tcU?l!HWO3I>i<$LoKNfkG@=O&=!R@Wh_T#MwL3`uj`gVK$ zQPUNxm?lor++}_=H+TYf>&l|qAMfR;P(C@bqFACgT&?3zAj(-ZhCOS}nl;lg=S|_^ z`TvCdkNzk20M+Z{fr0>Y+AuM`0%-WG`tr@2c9Kaq;zdc>%CKh5;rtkdRyp{V zMg*_ch^%^CBL+>h9v3Grw3mx`NKBh}I6G%rBk*5|SF1Ro&krKnRf6**&h~jC2sO~% z_wgfsfTn0jp2=L0E=^OAQQy_U)vIq~WK31VxrqXL?WoaTk_{#X$<5dRm$QrWizfUU@v(D(d<o3b|R$3BzCB96Q8C>Fu<^ggfNpN+3UbVafDIIkGoM ziP0x6v^ix#8|RtaQU@Ch8yE^%Xk@&Efkz0t*Cr|;%F%ZC1^jFyLr8}SssdzSFV2W@ zBTinrQ@ZF*tI_aq-2`1<)sp9di0A}Yy<2&n@G!gN#T~CL23%7CF}WA(9EFJ!o|Koo z9Bw3@#*!mNl@rG6dE@NGC+Vy;9w>9xRd1_b^3ElXgNRTxh2BmE7@oJv8Lh{&8Gn2oUBvoR2byS=56d zN>F>iYmc1s=U6YmT{2XoVh$Hd1GOegMY=}86#6A6C121PK!>;v@YG7d2z8c z54@|aH#ll4)KwzK+B%U0y&Q&-Wr461P!*C7RL~cB7i2HR&#+2~*fQZ&dTKD<49Qkw zM;Q}{|AOWo0d@_K32sRV*RbYIK(;o9?EST-;7tPPHoZz~5O{|u*G9-%J(O|G`XdadbCa#-V(!Dh~qY>gb6&1PYVx#1^MOL*niju-a(2?BT*pC?4O@N)zxxB-QO zM=aJ7b2xHg$62Qf0oOmT7ZoIF9twnZWj7yWOyUC!wQQUf3;SU*;6aE_h_a)KoSEh^ z6TiZ%Ix>nnauXB88;JZ3>i#t~ly=@w%)TMSHk%DGgI`Lg2}cY?T<5Q1F4%70+`JRq zU*EWQzb$@>IXb5y0p8zw6l_0QU%ykhog@!cS)V&8=ovNUG){_|uBpL$x@axtWC{YC zd=S0s0u8cj3D#HGOHu)ASStvn2L39hg=)NJHBgajZ&*6!4dq)+a}#CdCv?tFsrfNA zRL~3JGePks)~Wdd4b&@%FXA}TnEWW9C&8_=G@aoOxV0-%mY$!Rt<>t8C7KuX;#3cG z22`M86=Fj{Q7s5xaRZItIQj+@1b8ub)X@@2iQVdVe=Z90=kil@_1IhbQ?fNsaUx6Z zTRb*cc2n$>c6D{~5|?S~nReLiazgc$v#?Q<>rWwZf%>SPD;!!%!*R@fUyu0@L+q)UMU1$yeK|3do@%(W*0J@?Q<(BJolLyo)@SOvu+cOmT<{9b z0>+D8(JwVivajTsez{qeu<2F&YO^ZgGT>UXCgF-V;ZHUvC0zBU{ORVjglm9jnllnE zc(ctpHqo4Clg(pns=2_Xo5$Hq^8}yXKY5@uPqA4x_e5!)=BM^wVe>AR-3OrH*%bvkRxZF8#&2N$&s@(a-fX%I?Y~@{np0{dzGDeqOdc(w0Z8a-aLn% zv&=$|#jBgIq33m0SXJuh`qMXhz7vElvlc_&af8Mo(YdZBQ_gx6dcBz2uHOwsj4qS8 z+iv8B0pQ85aD&)x_kvc8o)vTz{oZES3IUZ`VGxVZ18A%}5vRdc==*YIRX70)eYej= zgn{Du-r$xWGVWQQ-nwZ;G0^H*xFO~;w-sAq z+j63a$5EqS+ZfWSvEzGnU6?p6sR-^cTken)udS{wt*!o15i=N)&A%^S+6aAqX`3@A zh%d!Fin*{CKfHv0Uf6OxoB8&TTz1rRV}5CLN_RJz@9;R?+{T^k^d^@GRSk`zi8-`M zZJh_L4c`%4xO|JKK~G`$O3GZu2PmS(+GFK&WlxWFQkwzP<*B}B479H0DtlUNY!(Lk zz-Vg&B`z}K3vHnNR{LB96$}0285N0St1;0icA~9@Af|j&*OQ7LZgV^8a?X-Uw1d>+KfjD7v~%G%xAH|@3MTT82JOLyu8aRSrCahf~J$8+s1$XF*zO5NdU z2H`R0nV6*&1zbL0-v=1g&?tIMom5TLKr3Fu_gAHJY%mpc){fA*uvr|@xy1AwosIrN zp3e6jZ!0A;C@hZQT(Lk6AywB>1W8xwrZ`3Yr>P;K5U-#~s`mYBH-Bue-dS2<dG+ z(%2a^dGZJtSw&dXoTFwWz9}h;N4UjR3_MSXH_-b(qr|a1CGPfuZ$XHPefQqVx21zF zdch|zB*S<9|72ME2J#@@#67>c983a#d#u>^Mx-!ux^bRJfp_$frTPGGbOQ}nnkt;O z#?+?HG^WET8q8n?{1#Y|mGE0+CM)B,$ZCf|+IvzeE-ZK<@^~$>U-Zx~T~->*NdN<7 z6DD&Lmp!RSM{+e2$gT)Cx%7M%St_f4(b>TPa29bWq7bb-Xx#lM6dr591Qz6kMCaB(qgU-aQ98{#eOP&aZHu13A}Sa1%9>nBDl?C!qg z_Q%t$?67UqR(5uhm&4?2hRI9jeod_2$*>|WY;(j0ZoK;v%#yj3*F$tXH^71;m{uYl zM4=$L66C_ZBA(f~-LHUWHungg6mv-RA(zXGcJniN_6D7rEzl13`*#a71)*I}b_mjP^L0@pY!Vb>L)m}=ZZ z4E#5S);2>5%blR(@n|Q9)qI9ke!>V3I)q+{2N7e;I$%MDT07A8)L}0Inv)0?I|w6p zK4AKy^7HrhjGcFx0qE_$LN;q)3<}4UlgdClq3j{t*)I%=`^5o5^8-~?9=>;9dDytG z#HD?6pdX|m=JztO;0P{xyA2bYBSvJofyFx^DV%Uyu)MIIsX!bO7s$GLydB$!ILV6H z(r_V&ON@3H#`W^p6i7KqkZnX8PM3@CVs%n~rJ+{v zx1gR-`y*?ao4r!6CZ=tZ=dkUhVpDq1^9Zil_QRgz4M$40&BB&#iw3qA?^5#vYTl#f zN7P)Q<~B8B<1HaFz0$op0OZXO_iCmw{*$9EjE){k%jt<|*CSrO`kB&3p23VK<)$HF zc={(M{C$a;@aIKVW)=LFK2r>MdpO#f3|{-w*Q_iTN+Z9OR(1;g63gk0C>kF?IBkd; zTI4vfK2RQOo5lcH&Qk~*qdZV|F2K7l2cWs*A^k);7(k*Z%x`hBv=6fM;z6TQvG80& zmhirWEaZq;Zu7o%&1!q06F017GFEP6Z986%TL6&Mh8L_fiKXw3yUhcNO}gGLg&)Js zEF_wcOG-6+-G+4=7Ee5KBRE{z4H;C+Z9_0D-8}78rJ;{PUlH88?HxOY)b z${HVxSaSh<8eIU^jd!pzLXs)txfb&8n9z2tfIqSx3S_XtgE_%8S~}X# z>BWcwr9%LR-eSD%^t`y9-W-mE@~oA50g2RD0Pr!UEAqr1EJ&JeK`&j%mulnbKNiqNoTZ~JjAh5jB7Bd~{Vf1%bQRa|h0qG<* zr>;1cIa^MgjSDF!+-YrCyz54&pu9k7x@m$9*?UIQhTbSsv^7~nSb>Eu1D)i2Dh|-m zv{t}T`9SqUglQ-M-z~jZl&1Dor%g2#*m52Z-DCh!7C*&SDO+ZVDXG%i!^^^>N(NbU zUKMs9Yo#24=bzHTu_62!KEJ@5CcmZu?o4W?I<59kk9hR_0Y|t+9J@|ikZ&a(9l(DQ zm&98sj|RvwE)&0u0~j%;T~>k{_$tURU5*qbY)H5y;ev!s2^S??mT*bJqn!q1{LKE9 zk#9|Pk4%PC@gcKG8$m(hdfvwlbEigPW^OJqv(PW~Akwvv8%ZXQhD=EfakiaJu2+(3 zEA&VSk(NbC^|oNJ5^NDwHbJSR_ysjKHPSPU_D#JN`K=>d(_y@Fo{KaonWB1G&Zzwf zoe>pG6mYDlB2Cq5XtTfmi4g~%SvX=J6&pvP(UF`ndys~Eu=Yg&8e=^s7YV3otdr(vgoLKENUx*oHO+MA+y3(L9Fy@s(X&!k}T-mJd*N(0^Q1?u^%A{ShyQL@K zhEup*nbij6dE5x+PviXZa8LBDDr^RVG}}Lu36hgJx?#5KbP$*|65S2jhh(1+eihfl zUce&MhT+FxrT_jndj5GeDI#g%=u0cRUtX0409KQ7#iSIu9UnDm;nLTX^scspLa5wC zOi*tbp$;RJZ9yn#o}lDRY@s=%EaZ@^C6t+lhWkD{jjZiTeL__6O97q7VRu?w#!AVA zy}EX7WzoK|wEXdnwWJ_Dei}>3;-lenClG5`nnqL~(@6bun5@j6{||}11J?im literal 0 HcmV?d00001 diff --git a/local_dm_control_suite/__pycache__/humanoid_CMU.cpython-37.pyc b/local_dm_control_suite/__pycache__/humanoid_CMU.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0207880d2a9a9129c058a47ac8d618a0f20630e0 GIT binary patch literal 6209 zcmb7INpBp-74B{J;VOz+Ewz<6@;K56uz|o93`?@eNQ@;)5vkajEUo5L&9KQ{M%6uB zCNl>^3Vf20Lx3ET19=IM9CFLQNd7?If}9BCoI?&qzE?FpGo&rR9(3s8)Uk{9ip%ZO_HXEj-EznlOl620` zHF9{L_wzxaQILHFzZjGnB}o_ka!_ehBwYesZB!*)_NRjB#{!B32n3Z%D^ju?3 z(m8*=af(ef7T9#-G@EHGvf0KNHrF`IXAjODYK`-3o}GHBH7@Y;2Nw@DzQA%U*D~1x zJN;6Bp*P+*(Z9&f$o@hnN-0^}=BocHLnYZJ`UKE0!?g%eToK`n% zCg@p3S3c-&Ma>9Qp&5mVh1a?57KDo~Q*J?9CbOh*&nzXofh2wn=iJUv79wncTz! zWT7X<3++p7U?wKh`*46)#=zYd4? zN1|FwNH&QZZ`TDWqh_S#All(h+~J(1d8J(HO}v0Rz882&I-4;^ePF&*E2M?ToBOfX zjMJHxhyn-Fbd>NVD`_ROO_-+0W1+ON9PW4`3IiS{>6~--(dzn>M-QC!m50k~>&uU8 zIkAXg;xvt&=i{-?HY}c?c8+scn*TVTe7Y|Y;3N!dQ>hDx$ELy-MTUO-E}&Y+`K6lbab95tkI0jPud zIZvLhI%|)YmzSid#07eF5zVAf#2eImiJHsQjLcIBz!cr^BlLfr{VV8g;u`(PT6<{rHgY zz}V%CF>>8aP75ez=*gA)wunge9tFVr8l4Y@<#1I~UiZ*7X3eR~O&In#VR+=^FrV;X-@~{p|YtTX8e%ocQ|m~OZH#GQ@SwJ@Q@SV3-LwGn$So& z=3yKOWlznhv;QqQ%~t;GTX33R&4Yv^9o>D0sXHR?|Yk)XrjN&RfU&HPBBocAt>~N8I6h|i}|0TZ49O+7? z+Sd<1#ncoGx!QBQ5bvVvH9S_gBFePfuNQ-XBH;^FK^&15 z0Ia&7>H>Fb69GE4T2YkLN@LHE=8+U^2r1?!~sJUG$ zNJL93$`0gWX-Pdm3u!6SQ4Fm17*8$aFB|EU8j(bl&TM=Q5BVO@2WgJSV}{jb5~>-C z$Q&%ajK4+wtlk@;&Q1BBx9EkW+u*IpP~W*;&86)cTWUT?a8MQC3MA$iwFIf3*(ggH?n*W`q>&$1<` zzIj$Jmu>v+AQ!wTDI0lzmbl)u@7OIra+A8fLc!jP?H$+eavKB+mFSwS3KM#6dpkU& z^rYkO6Gy|zY!nZ$P%0m~ox1%9xG#3Sn1Bx-Aw6z-EtrUn8U*<$Pi$lqUXO|&e;gks zr=!5J>-yWWN}+^%xR6S*Z`zMKDjm9JbL2-{VCH9GH|Xrw)rk5NY$poV2%Dab1al2i z8toXs!rS;VM(!?^(?lhc$O^~~`L6wJa*At&VyjUIjv{>F!;`TjeL9-Y?IuXRBo;$9 zZ!_Luc$oxUOJR5_BxZ2=i|*dE1l zx^JO6V#c;LFrMduZuz7@)(DJ~sK-p$qSgC+4K*URJ zVH{;6N=Ff?;EQM}<3_A1ri$C5q7*R7lVM;Av1vgWo>I*`DMopkGu&&YBq}P>3LqEX zO7nrLpU{V64Eh-!KgShQ0h3yjM8(86(w9RNE zo&bAtg>;4v3DD2Qjr6Gil}DCf4vKn5%7qTdPWgS2X19%zb z7p!^#H2kp03fqSGGuA9&Y=M>9$iN19`F);EvFTTa{I1Z?^{oZ1Usyoyos_pK2i4=> z9O{Fj{HjQ^W_wdBV23l(65S(r`}nt!ZoyhB4o}G&YQ3I2hK0ZX5Ya zJvF_sbwuDo_25R-4Oxs+F`zvV^`HNs-+ykUd1Ut-edt6SuFBLMU{KVgBUB|ZYUnnw9+;|rjLj#*Kem* z7>S@(R^Tmhp_qlmv@oo3)2i$O*w9&DIu!@5?`H>K>8WvC9J^16AyCSoA%0HFSrk7` z+JBiWSxx#!42mfi)C@|&s>6%iz>tnm2Jp=5RpTsxsESJh(c($BP?Obk0@Y72nV|Aq zJfvoo8VWvwpiTUmnq4#ypNdjV4yJ~Yif}N)L>aZ{+@^yoCMTnmY;j&tzD6m zQ$pD4(m4laA(aZkK0)Rset`zZCZkhBL3&T8#vOIxt5dVZZ$4lYfq1BIghYukt@_t)tOo}Ov+|L<)he=7C%&p^GCB7 l9KVakG9=g;MRk`Nmj`-=rCTQcClUI(6=s?|k3yyVR$1b2S6MyFZcqr|%fXzffWPbCCH2PkPNX3~qD`ZgML!`&P#? zDQ_chcWjk+kas$c%9o;2r-b%WRPL1VE=QHV+i_J}C93vootnzKQN7>jG*rHd{9I>F z5BD8o^n<@XYL- zwKZHA^aeo0hsnfZeMAWfp747{*ENQ6X{%frWE z8YVIF^8*pa((4W5u0+i`s>=Q0PSQ<~t8|lCiX=kTdElo3HFlGJUyZC$BUTDTiu&r) ze&op{iKtKtqzI^YN%-*=WvnpC?J(|b{rR8K@24MkZQ37x^d`o*jwfA0B8|+*%*=Xb zOLNESSvaui{|z|iULc1ePMO>eSU=%G#CVW)ML3|1Ge2d1%)WnTjinMt+CqslpYgCO zS<*vu8pyO=t1-4Bw$f{OGZuRf5*`NWHMSBnkl`|mhy4eEpaHsta+{wLwf=E;fsn zVSljO))no0s{XzN3vA*Tsr2J+ppIIQOrLgz$>AUh*ybfgJ&$fo)9P|Tp4R4N_GCK{ z=%eX{=|EOZL|k_orjkgT^wjn@KktQ65c~b$8Y5;1Vwn~*?rpMx9|{nhv6h|Jy}ti2 z@YK4(#U%)Cvh-LKu`3G0FlYVgJalrjpUXej61L58H z^a}o?r0Yi>?|TKedg(Bf!IkKdXb*Pt<*h*Kg*}kT)93m+F`I?Nm=mXv9^yvYpW;SZ zSSpZ`rshd*zbJzrozZWMnGx4SLKno3Ly^RNkbpf2l^p(uHk)u1Ig;mmMzZFA~b<`jS@PfPEI11S1{1K>b-Xjg|vaha2(Sr z{=}Oo|I)O&^^0ir{xMo;?U!Y&E>0tvC6zc!wTBPYvc&6D`v&#>N+*8b zyLuCykDr*Kwo41sk5pR;kK&FVGQcW_P-@41W~JvSw{K_8fw|{MM{$h9%@_9XSbOEY ziY)DvvvOA9)(dOTm6goOsMXr5W~ISeXzZ1wyHm~FtlG1 zgp5Zy?*BKc9piKYIE}-XmCz9P_rh*SvQ@xwF$SoUN{x+E;h2JX-+vnRhkZtEtVh`D zLk@#Mm_mC1C|Md{BCbG+u`#-?LVTeDAlSjv0`I;8+yX-50@I;Lf-TID2G9evY*z%n z!cCvW!4tMQi`SP4^VgF&U^2moD1f^1)zD%#e<-2QWeALmcJ*10%e~SGr0ODVX*ro5zvBSYe>kHs@>pp#ndzNOt$eVwed=Tn<}UOMtTpaJCbCz>O`7RYMBa$ zU>=k;EGsc?)$$swlR}NreU`h}@>=jXhy;ml zUSFNUbE`Im*;ipl-q6}nYesRN##aW#LXPy)hese&LCrmMQ`U=da^7rMWwT-8ZMtUD zRP9|dap+eq9t9!$C~$Kh1`k07w~Abe+eNNSkis1RMF~Jr$twFcFZ0R)>}9k#024QZ z-TA`abJ12(y{lQ7dLNjkk-6w!ljY3buVf~#pM_z?8)uCJ>zPS&sP?&{-BRt=q4o;E zXWls7_ zVpu>Pq;L!7#MoAWykD9&M#&Rs{Rhb~=9gjSjmHcJ13s>AX%S&;16JFl%XsCT1j0%B zmka+%%!Y9Y`Rv1lCI$l7phay!Ps08F0Oxwup2kgwb~LWUex}=@jTG`$wExdGgMIxk zj6WbMkY_AZw1XqJ*apezsu&<>YQ@MW3F~f$5f?#xjVbEUS+Es29Z8?aG^(f@{(Nub zs&)^#3N<1FuK`m+z8x|Wb9<1aqe}bgx7at?H}jI}o;%bxuTHK?kS!$6AjvBQ!CG!! zrhD?GsH^&d^>Q0FeO|jid@!yk`yhSH(I7=$DXzCw&GjBbo-a(+(YKnX^!1{NyB`G+QaCgY(aE}?8w zSz9A2b!6$l0*;KRGoV^CG?l-eK=9$YH`-`%^v&3uJ z9IqF^ROgL{miQUkPVhNCufV9D;XFS!#hZMg7(36KMQ#B-Ykcv*ME|8s_pcW7p2!xm zdA|I@R*+XG`kXP;Oqt#VH%}A5Hu=e7Hrh`ktMOCWB6ei&EoDnHdpuq2aaz~p<2rYL zF>9!G8hh@eKlz@4nU(>=XZYC{HIea&r&Pphrf>;q0$*XXjtT6G| zkG>Sjf5BD?Y|tCg@b_+d1kAtkHZbn6J03H|97VW7$1)xeRp{a(1Xg_5kj4GcU_VR| zs!+@SwjT|HRUr}qy{aP=#~1;2<>bDJSOL1n$xvCl3SUQ;RwTj>K0MO*H1^Xqyf+!e z1Y!}PfRj3U9XR$Y*RLny@~qx39qBj5y(6?v9tw%LondBpy{#IOa+( zPzL7THvnuJ^V-ObT&)}0@6G5Ex1S)`fu|+ovLa#YNsy6Zm5_${he+vFJpDsVq%p#-=u^zR9;fL8_Jc) zt`yftR9({~Rfk+qUA#$;yj~Deb-zth9aeFI)YTv3NoiBYsV0nE{8^`<-Ik%}nr72# zLf6&IWy^&rT(d~~Et#k6d6>l1zO4GWmYS(clZ=0WPU90iN^J&4$3&3W;%3L@@Q4vC zb+`kJEs3|$XY_Gl{j(Gh65+-X1XQ6HpYxDP2&462Xby3CW5;JUK-wiWOFw!~B9KiK zq_Jb}TbZ$MA3$$lMRWAd*!d>ejA4t6jkv-y04~9#ZMPh80Tbk9h_4@Q!R|rVyrde* z41$B#J3`OArdmDf`41SY!&260sW@JH+PvN>88Btdqp;RcF=)f{C~R1?R6LI- zUC$F7^UW-$E|Q`=ihzGdi_y7V)3H>|p}U|K`lUZWa`-NA4@*BXdax*X>3OwN3d-b^ zJclLFsi=CFSDzz5&uhGnTdVR0pToPx=lKHOb>8HQcsG6ovUlb{-xK0pEHb*d!qznG zeKz!Ap~L9~WNy*r0?aly;CpShw_g0~Pxk-wZ~ywE^+SOu8xzoz65}h=$P~?4)5tTW zC2g217W^i#(+l`Vnw|W=1P9!X!83joj=(HMOfMn_QnM7r8Q8D0bl8VCf{RXnaJ9vL zg>}D9HAOIj>Xh1^^?sNNj3R`N7+w*`yQW-|uN2gn1|{yQAh#b~WNRCM`tF0KD!tF#~6;x9p93;~a4^&m0NCsa4H}V*9Y%(U4 zk{Dem22F{{`a|3d!snCjMNk2aNrK#{dMB6{&NhwflGzk34=4f#oa%hhyJOnTVGS_Jk1tcI@NYDRaM7?v=O#-WJ5pD5K`#xKF0f}yXCL3w z4$RD6Hl(vdE&+_^*QhT-kN{ij53COjS(7lPpILAN&==A|2ChB2bPr|%yk#W_6BA51 z&^}pW680>8Luji|?RsmD)HS}=Ad>S)`&aZ;h{qZ!o%E5AViOw`K9bzwzVvhF5$;p$ zP%R-tzC>4~#nUU|cYs<&GYaJcXL-`g9a?pn?w`_ga%9V`dxw0M!;_%wkIBA1L6siF{`Sx+gM-*e-#>k<3|5G= zBEbbz^MSIi|RGxB!>NHlX1iShCSYFa_3wk+U7?-1~O79*eUyW#@w*bC3QnK(2 zP>yV!ra3xsh<3W)glr^;6PD4|1PBDCfwzti!ytk+TA307wkqsK7Qj=%S4Fa6^n7pv zd=;r8EF7jqBpKGaAjhe__4)E&-TLRz^N;bxr__rQghn_PhDqGTOAuomJ=8oMYPwF9 z4psom=|cya_BnJY>Z{~{KA34^Vu|Be1v-4N5eRs7q$BB5V&R37&eUcTxl4V7 zvf(_aZ=As(4WlF;-?U+@;|z3^e9qfgy?5vK`pTNOad+*@PNujW-kw?)B*a&LZP^&G zLyvI!OgF;cc@Yq)dXx~eiET=rQ9>{e5fuqkh1}I@fuKZuPRZ|4@5ZK8_Q? z?a(2(lKKFxAsp_-({?|39HiRInWF)pQS$pV;|cG^>dlq!u5EZ5x9_ZaYq#&*-cVoW z^7@9qwH0u(>(ab*|GT$0R&`+Ch6%5}H3~-4sBd|nP>y^LWvb>5jZzew;qwXR=kABv wHvWhtFeR_Yu^IsW2Bg^4IV!0?@=~bYFC`>%o;S?ex0l@i0&Fk0{r~^~ literal 0 HcmV?d00001 diff --git a/local_dm_control_suite/__pycache__/manipulator.cpython-37.pyc b/local_dm_control_suite/__pycache__/manipulator.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..97a06d39df9f1c1d03e2d3d57f07a21491b9c739 GIT binary patch literal 9099 zcmc&)TW=i6b?)m-&xON_D2kM{Qm@u)Yi#XkR|eu(Q6y0ky_O?Uib%)K07kPpRWqDX z&-AFeMe^N3oI3TLQ@SxVRn+ji_ZKny?w2&}-zYQsIUug%i9XOZjcIj_>C6c9 zu2DC1qD|0d-ITNi+NxWUwgbCvqdphpy7_uumgNJdTc{T#?F7Ycsa}$F0d%=umUJQ?F%NtZxR*QX_220c@sk@QqB+nuY=N!kvM)#q8IejLvNo35W=GxbG0OKg_Sv14qW z9cK&d1Y2x5^^^P*TVf~KDR!E@!OpaDPxbm~UfFwNU#p*CXPNs1h3?tUb4{Pt<*$eL>cpdqv%OR+V*cy`t`m>`SulbMI^H0(<9~ z#@^xi&f8DT`r9a}v5P3V$V;8iqvQ+BUel`Y4o}_>{GKn|yMC|T9|V3J3YUdlzul|7 zAc$_%b>{*GCnES@Am1yEBvzXm7cI`wL>xs>BE&VInuH9Q;dFD)% z7gYInEw=Y^iM_Ai(!S&F=HmRGlUS^n}z|)B)_lg+M|tcY4~yp^#&_F zH)5?*ZW<7JdARa`#{LHtaMg8R&q(-CDVC}2|eeGx&1CdjntE#JB0-G6ZJ{#sgO?Pik; z-fM7CqQ+%NnL!D7m-phd0CDzxR6`BaBq8)rEilb3udS`DuT`BiKZv+T;?1>t_)MG@ zNf}4YFaUkq>UJ8d}Et)fWMV~Xomr>;kq#BJhL=VvHn9+Q zQz}#JM-Sa6?RZP-8f`F_cvbsa)e=?ALwpfQI@25k!HyShg5Qt*&45E{P{XiWHPV?8 zui|!>c-hCk1my+oZac2#)BOFdov7W2(&?rMyWW8S6?nAx#ulC?ST6pQq$1l9yO3u2I!k1H^U8E>l8#Xo8Ie z#YRl&Td4RnE|MYsf8yeoU&TdbKHN{$_Vr11|Ech?q^2nKvH2QK-ui4#{-2tSS90<| zA%E(8EM|Pflm@S1;|j46&-iD8x{fEJ&BC?14m4~qy)MB5kfTN50B@UB2o5~c>p50n zMU>=OiIwqo*c7bowA^jOo0-*9w$*2>O?b=?+$Q&N-1Eq70^HI0{^^)+3}m+w zIng>e7dl3wFKK`@=AMPePK(vN93N5+Pw=v z98uMM><0rL)o|+CJ-^FYYB4|dQ|l2rq(-lw<{n3}-*}js=#!f0lj`eL6Ud7P&9o#3 z_o!!-7E}fuk6Me_&2Z3TIF`pPJ|v#YW6-GR!=;REwU^qBnL!wj9x^FW1<52c1^bqeO!OBnrOSwuV_b54P+SDY`>bFoE(ML67I6Io+xOTlk0-~Rd zht?wJwh1EE5Z?yg#^bxiFQp(%r%XXfSuvHqxDXRw#P0)ze3Rl#PUh?A_9wmcf^Jl|ApTRczjx_ zZ>Rb&H6A^|v$I+)rbW0CJqQ}W4U&$9xJ}J$UkHDP+=6J!@AFA6Zj*l4+m8l5u-ukZ zk*Y13iX#?qGSi9D#9RR1(Z@u|QI?lwk4u1UVZ=Yim!*UWT+QhVIGPvqiZPrYNno;Z zgO)=-dXqfH`a#$q90l$tzE$;A*eJEAAZ)$YSISU~crOYCseDyeDse;`>^K}Ww*C+! zD^1XcQ=@svnz0g)0*sa5JTe3h)C4-@ZeVXNk#B80H-Ouq1lX}^YUZk-*#m%E{&qXU z$t~w;5tg1{c>x=kdy?v* ziZx0&C0mp{qU3*ojUA&2ki zj&?WM13G^;IeD|7|#2Y!Pe zb{-D=i7_t722*GAr;Md4>Sa zpU4Jt$H$>`tFxnGXT+Ouv%}$LZ*uax$IcUn{|z6Un5U#Y9*q1)`rv4;%1OHH-s`K= z|ANcmYJqjWyU`nT`#Uw&qV_;~Q)@8Qd}A!33;4`Rh$Cks*-gd5r+!8qXW8Sz0@?bbo>;nn9vXb;npyOyu zLXcMF1F#4O3|PNw$Ttp_MDLg#3vn%a+q($X?&dx=wr?az#a3cnRVC2)_5kloBG{|K(nM;l}O_WwpdIlwo$%wc%4mZCb-4}H53kp5FAUh$xP024KN%QTVw*|AB7C@*y zk9cCQ4ciH$lj?mKA-g5~s4vHWBS0u$4kfA7*xpW!;dTq9`2EW#BjQ@CDS!Wd?Ztl( zj-~@XHN5TWTPi4(YixyWggt)~pk<@|zU@1h7YF|Do(F(cU%=hcxet25{je@qk7q ziz$AEnpiyq{lrtsQ!pu=Bm1WyhL_2+dR(FC>(uCXD3Sd5U7{2t`n-ov$}^0-jsvDB z$4V``9E49WJ;|^rEf4x&&?uJ!V*SVR(pHb+aQL6v6)Dw>(zAW!mV8Fje3pv27KSuU%@JJx4nGV8-_(M3jXwD&+cm{B~ zNC9CJDQXt=MWbfS7>mXQS?{2fLXRr&NNwiy)9`wVI0qJtIgB@B3>Vc|@G`H*rvmK= zdT!E8NL=Lm>?1^gy^Juyz7Ff8%bksYfy}{*qR!r@fU*f%>*Pd?l036yy)EnWD0gI@ zEI52vp_9OTI!6o$(quQY=^PJ_T`HT?1f$m`X-{B>F+SKg&+X$$S^+X*ke@|a=)XGqbQgN-QTA$wBRhqE_mq}oK=v7ipA5uK`IHfMq&6&hat zY-Wz{hmLUb;2se=HfF>qwsxI3bKvMpRV&Ltcebddv9=k*xojEqsjT|YI>zShj9drY zW`IHjcmPEZ6w~%#>WE?#>M@3AURqP;ru!`f#%imO+b+%SA1L`pB(Ixuk+!K4!y7bX z(tXW<$u@>FYSsr0D9ltAf`*MKi7BCgv>?1MeoV>VQX;uE{=E1nl%-Rdr_kWwfp@^V z(Xm4ulm{;jBmD-7l2jJTH=HTQbV`mTYpdtw7b!l|98l01V@Z%iJQNf<5r2)OUOc$) z6gCRdse^)=2-Mx&!mIB_MV^KsOOVb?^cuwNAQArV z5p0*YcH$;=zD3CjB^20CE$Se-bHG+bNwsvYs7TgTa>gRUQIpvf;!)s2?CHI{2Rg;{Nw3|aHvsu&KELPW7^;P-t@^Z_-wfhgn zH&+bfpY)@;Y7oA{o&5^~H(Z08+=|TDaxIf&8?x=%T6Q2iuA}8zRC8-sUy7FEx?9(C z^{5dy-KLfsQ7dk{Z7nw;cifJaThVg7;;v}94SCgF)p9LbbFc7@yUv&0tK!P(HQd+5 z)id+VbZ>~2)0=08dyB8|)sKeznYeX&o3EXjVx8A`ZD{i=eEp;O!E}G{%-~n~wT}kB zChDU*A8hvyX0G!an7JX^qt7w(1+VQH-J8?3mm=YlXc9A?#(tReKEVS<*UVS_gDj0E zN_b&BPNl+BgNMgq7N!a0m9Y#H5ABf(1LVRFdSkzjGkMa2H};yKF{3_fIj!m*c!fnw>9`B^4Z)|+<5r5Aed;;3s& zvLkOv;d4)G$d|VE_O|!;{$|KkEa~At4>ym}SZp2(?k8$fiA)LUJ$<~1e^DCvk;h}N zbcB~pLM1lmUm2g|*A9g$lzXtrD=hgb9o52MSn>+SymKIu;3)RxYZxDpH|U~pb#QCX z{2Bs0Vk+x{@vZURR`v{D`iuGAnOWmIq4C~Q&ZsuCXU@>#*31aane|ueTND1Xr*|v= zRi?4*Ext4vGv#NmS&xvDurH1CmI`CxMPVGO?owWVd32J6L6)x$Wg2^Mn^$@y60FPH+sSb#(eeJdEw9qTx>ziDuMr4`SzaHPnjJD>;crPHkk?E5Ht-|U z3GH;umf0{JQ{KT`fO~$CC%?j--NPUZ*Fo`S(52YWAUf0SU`k7nPY1htc2_|{& za9{bk^Dg!&U!6=u!^;j_rnkK68xLSf8F16<<$-$_c6|Q{)-Rpl)IKXt(XCEF+2xsIc7zZST}R%9GRZLH&Kqpaogf{bbWQmqnxE%ef&W^m6DKHi zKEkHz1h=XaT;6oy5cABrfmatWP)rr(-fY9QQ@`srGU#HWq$F~5uj=0Cz`<>HW-qi2b^M>b92zXxJ@`zR@0NKks zp7&6Qf5fvi4tcKHY^z4NUBVW>$5eyeH zto@BKMB(Jl$ENFO8CmwRfn2NehR(GnZ{b=022|!!-gV?nD4G6pi_NoWun!6tFqIax z#K22L;0J!>CxKYRQa_;FqKlq(d1nN$GL$(pK3F5GpheVKDjoLp-#ZD?*pI>~1u6@Z zOlhTv(LrUvpx0_KTw5Tf2U=pRpu-1jcU+Lb1`}WfA(_tx$s``1^okw5uQhM(0Z1{J ztKY!4N<|ElN*v@4-A#cL!V#X=wTgtJO(2h^=LHd}62fH>6`NT5 zBkm$38`HIt=6jWr3ph}45Ej{&7%0(BQKUw8+o16bc#Ke(MoZj$XsEhs5K5=b84COy zqrdN=JZqQ%juI7roU0nlSJk@I94Y+Zh>3BSq4JmLy(HQUGx!R7Q#h>v2uK`a;XtUj zLL}_Z_s8J|8;5Y0_SbpL%aZhfZCwtOqE=OLB7cID_Ped3y45F1h{6!(Rx-}5ak$_B z5ZyS9MXT}WG=%E1_Y!2CitaHUj7i?5~h+IhzG9iyOL6y$hUCf{tlLGA) z@XDwi3m_y#R1a7J%WT|Nr1q0yo%La7sB< zYAH`uCHEE^P;WS(NV{-KVu8GU&i1@fRiLhuw}UjIghW@6lwkB+t7WEc6Dd7N)6$;Zw|0gv#-%v90Z&nCj+Bt`p$d7tM4KHMN!qU8pTt`SjTMG4tnV|JR3FYs#lupYySmsb^4P4 literal 0 HcmV?d00001 diff --git a/local_dm_control_suite/__pycache__/point_mass.cpython-37.pyc b/local_dm_control_suite/__pycache__/point_mass.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f2dc9c399ad8c77098fcf9a15af2923410b77c85 GIT binary patch literal 4527 zcmcInOLH5?5#ATxAV^UZMOu%nQ%-Co6c{I|#F169X^Tp+B2g}ruB7&|T<#3Oh4ul@ z4oDLPo}?FZ*}2IU zoE5%weruvRtE|NqUu({7zIwjKme`Gn!B<#`mAWQdW;b5zuXJafzD=}Q`%Si@+TZ)V z#%{6I*BV>pmHwSqrgH}^x7iw6)_9|T7cKW#X-{jfkCwkmf>=I?yfn305_v(~xqwx) zww^6|hiMWHC3k~pkO+yUDhrN-G)Q8|t%1Nuw>yk|iI!b7m7`%l@e@cDKZ&JCLWtI( zmvZ{(CsCwEHiQ>rtzg7OiVt<_i-q&Iir^=nVClsQwD3ul>Cc4lB}%%4N^0ji(v*i)DqlEF${?qekb}FHI&xRK0%K~58N!Nx2 zxJ)~v)_^SHl0`b-MPb_%RjilQ1^1Y%=E%xhdwbjadw|K%F%Y zdF&rWo_L83WES_*Q=jZYzY+>TTiNIn2(XPVjaS-R?aY)W)5n1M8{^Cx8-x3Sc4kPc zUmBZZt7|ZGtOfem__Og=hb^tq`%_zz;b;~6Qd?Vk>C4X3wvjE)4$|%60Mo3TH)M4g zMBEL7D3I-PR{84aG!6VTTkMJ?a$!_AKY@HtH3h-v*s&->>#VUI9|t0dBOc3kNvxtx z+@cT5{PKhQ5-91VS!GbnZIM=&=OG9r;wGw}p`=lC;)btdKKjv< zK&gO83XioupxDPE>zA^zuB~ZUz#KiA2ir0CC6*wh*eWW?1b`Fp4m*hLUYHzuAv`+~ zEQmeHJ6SpZn$?s>G^A~2rPzx&%Pi(eFRS%9!ETUHw6RhicCQ<+MZ;W~xcOXL&^aYn z(u}4)S}ow)x!UQ_m0s-gYnz0p<;!>4R%XP5 ztQ=yCA>2GpL?kE{v~}?T9@o{sc~vu)7I=z&Md%padG`)fzg;+J)pf~UuA9|eib2DW zJM_ZhOT~3r;=8WWL$t4Ip|Getj^O;5mL?3t*NkdO0q6<>m1(IH^}K?>#`jtm z37A>0YmTL4pzw7SSgf!rFj;0bR>!mQJwlyRg>9S4Wk$cK@eFTF88_+@2_K&6j zItk>FokmH5N1kRnyR%q8h@F;Hp;jMjt|k#e$J#5SZ;pZAH#)L4r5n?G2$=80fegG5 znGDGU_$LjyPg2d6i+$uB&$KVrP= z4MW+^H3yO@rtbpkOYUr=MH(rk!vPoSU$Gl~vJ;vL+Qe`t&cTh*^wr^nGIjI*6w!-qPE-MSSDT7#p(G zFP~S&#zen|EXs^C^Se*RrU>VHS!1i{WvO14tjf}PZETFKiSY^ue`%eW-~Q&08d~&z zz2Bgi+cO?%W2<-v&7^nPlnVsE^VS%v1AG;Luj+jY@a=)FC}aS2k`skd$@(%!+Mb+# z7mDI2U*Llv131IGY?dvCC2dTX;hI~Vf7d*r_BWg84r8AQyGArc_vU(!CfjdB0-=t2I^|NKdc^*yN z;*gO473L`%A(;i@iw7SqmOBZ%Lx1_ULBN_NQG8!>1D8}`kKCk zztP=UeCqsg*_m^YHK=@vl9DFm920Xhgp6MR){9DOiNcZiC~Vamnv z>;wrP3a}k)B}uX4B+jbG93(lArw9iPKf(6Yog&Q|F9hR)vnL?w;xi~9K1VeT1lLOh{r4^_5MWDTD~mm+hJ|4e;JWf5}8KcFZ&O+=>EY_3%+ z%2nI<)#SvMxOy}ouLfEG>=hBb070&y6fcUS5gc>1*7by`(*p+);G`g-Tj^C+wQZS=R5mZW8drb zI3u={W~IGvcJ{aPJDKS*rfyH_`k)wGW>Gh_|G9&k#htABC})n3$$TlDg$4GEKckvI X)%mF_7=lsNtd{EfGP3h>ZF%8;%u}jv literal 0 HcmV?d00001 diff --git a/local_dm_control_suite/__pycache__/quadruped.cpython-37.pyc b/local_dm_control_suite/__pycache__/quadruped.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..82930a344e51fac587e20f856c1b1e4d9754652a GIT binary patch literal 15727 zcmc&*d5j!cd9SPQo}RCHa=+@HZujL^ zH6BmXBL(aYB+(M0AW-fZLX?vvkdQ=*5+sO1Ab=7?t`ZVNNk|;!50pRh2TGLuzVB7l z^o+;L22kjke)a0TSFhgt-gkdrKRq&%*YLUUsX$yhrD^|2H}S_p6({o?yl@cc~9jYF;>|l#w+)ViOPLqTjhSCR~`_P66J&khgTn6)4V+*B~mq0JSg_w(r@aOeVf`}Cm!PV zsZH$x*eYccSgM!VOQ0h+R+alPR;x*gO@J=y34PHnMM4bU=! zrgVF;*{ya_%eR|X8XeK-)P3A#s@+Z?yDe0$E3WU+&}z5c=6UkeiT8Svhe~hBm4d3d z8|{`8bh|CONqK?v=)nwTZM${v_9RwWfA{2Xo&JaZhmPJ((H$*Z)4p-E{v>}sd-S$} z`>+&rJ*Ot|8mHyef^dZHuejB#>Xy6tRyOJjK{&cTTs35t+*Zr!-3X(~a#ZQWEuH04 zRg2%PX`{HN@jZdBp9i%B+Nv(}HGRbjjHW3JVZNfTqyl5r8d!tWnz51|Xu`UhljjEM zRp8TVTBO%#d`4te(Kpc6j6tTE5!oZ!!1OagJRTQ zZf$8Ou2Qc^vn+^xEC)IIfo4x#|SojbNs~t$grc-RrjB zd935My|=#mLAml$n5wn9UD?mRd&S0o`Hi9FH~L;^(Li8bS1c3mwOZ63cwstlW!($H zOyEfg?CX~>sdPJauLivJBwk(YNiPVN9meyiQJKSuhI#3^!r_(65o*X`{m;45_he|I z8d~&xn67nY+YQ3B8(?c!!nAYg__=d)vPh2-b0t;NYO`U62GClw zL05rhYBbFA1|8a!+{@DpE!PiAgrvUXVrJfEo~TDX9HucpEzoK=tF?F{1sHlN%rSy< zLyjJH7+eJfBD@jsDn@W}c+|F`S3ADf@w?Iw4O#m&O}-6F`}SK8E_B=8!RwxIJHf%g z^YOHE^67*4Z*{9~%MooS;t7twi1`n$AAD~a+{3-3O|`Ye_Hh*TP-NqSm}VVaQ_`Up46|e25x}JupQ+gG!(UdSFaKZh;cbzyu|lL8_Ss zEm}b)Fb0rquNW(-fzc~Bv=t-BHgiO)HDjO!d13yXF)%)2d_V_ATm2ow7=6iYU9|(( zzgnKsNS35RS#rpf*6OH&x(Ggxj2x z)2EJKJU8#mpFKbAoI88|?0i@tYFP&tSKx^cdj?U4jKK%of8*k=4UUQ;}^`$Pd_)6mINyKG>x6`*2g+m!J6y7 ze3Cjcz38T6-TMp0!Cf+Q1eSDG&&=kUKZc9E}uio|I_JgA3mMg#m=U~k;kzV z-sqH}HhmHe5$xqO>S5lQy*Q&lEl< zwqglOq@ZV}L|SBUO^d9^;hGW9CveRQ=nlB%L`jU|nipeY9M^)F5ZiDqib=5@*AddH zaV?Q%jq50B*0_$5c8%+}xL-Vg>%{vsORH=HPqi6=`qL3#v;9VUvE`DEX)iT`1=|&( zL3eJ;u6Zuh3D377Zds*RS7yOuNtvyZ21p;e(!S~I+bBp1478gDXa?Grt~H=(IcA+o zOK4b!J~NdHQ$AE<$tw!S9qF~Y5dX|*g&Vlxj!61B!E(>@p{+WklK5fKS%5<0sXOp` z%lBHf(4uFcS>zn2+H!s0an`wjdjID@?rugEp;2c_#PAlI;ep#0;EGbSO?0t^G8|@KY*>!d)vq8J?f?X0|{K9JjF-A zkS`x$1;c+ED)Ez38Ck)@VfI;Xd0NV@42@1NOf4;V5(;9Y^SF?gslTAs$Y(ia@lwL# zP#calx>JXokM0`6=OxF>X9+kCO=;uOevvl7;!rn8UD0>x6HsP`4tetv39RX`>7Y;0 z6lD02Q8Cb(FesXqz)`P}GGg?f9_CG=8yx7#u2`%FcDKgl5kODt+WiCWjfQVS58=jo zs|%$sYAw?V@vzY7_oq^_hPQ|tauI@mRdb(h0}8r$}ukK%+$=&HR$N69p8RlB|AyX*_LBJC@F``u9OI{kIH5$q(GXV+Pt?4D9I4yRrehL-AH{&|Vk1J~(T6!=hh1k*AtnxCEdQ zT*mgKivsm4xP(@7EAo~CeV@Wt(Y4;+AH&eAcdJ;LY*b;xVBuSmbxFPx&qf&9>2_ck zbrmINo3QnXCBtvfS#0}Zw%rV#p~Z!TcJrB;dFRa8nbXr@dI7UnuMQ!CbqzoWt!p^G z&~&S`8DdFcPVw_CNqubdYJZ2eG=@S06IY@bW(g?1zw2%rOdw8Vo{;)6+-xKONM{4u z9T7i~1rtAbb#xb`c7spv(pJoY3Eu(a8u9G@V+kU3z2uO6qLB5KMm%%)q%DON+I6JB`L#t0JjQ&lxdPF7xPQK z|5OYZsFf{vz@SD{y@OcnwMEZX+}$RFpZr+18$>+R^UG6t*~Y8n07aMsa?P<392VQn zx#^iX=hTJS6Vu^HbT@xt_W6s`F)Ni-ztLG30`4LviScZbmjE!)5@OSOGsKF+gFb_& zmCj(0_1m9}QEYh7*3jCEN4J6&AjQ0hAVt!<|6mNKv*#}!Xf>|h0i5zwb{!z{0|XTi z8iV5WJ7zD0lhMub6DOw6P0wC9KRrJ^dlwKCpzqyn=O+68eA9*t@rKct?2|BpR3erm zPoc8DPdKfJd@Q8^k(wmP4Uk#Q4q!fA1%aWS8-Ne5nM^*fk9Qwo$&mA7Gm@0+cojb1 z`gB}5I*cj`G7}LnW{-mhojN;n@(xfisz{n5gn8Xri0LaYYcXWP0_rhthNGRX1MUVN zs>j~g%@|V{TJx7NZA4+Dg;;ujTMWUWQ3)hT_aG6#kh~Wa(mev`9;;>yMWum6I>-ju zETDtAovH`!(rq*OK;+a(G-nWxNQ^gNy-`>6O)>Nk~+Ovax4aVj>cUTO8ZzeiWZ z7Ko4bOB=VG>@Q;|t#tgnh;eXbBYghv$|mQAB};p9Kh?~XSyW|LaEcND=^Z^2l3`R6ow(pT|>V@ zUu9P@?A+L$@Vakdiir4&WMV$B0fUn1lJVbHNKMD+^Qw?a9R7B~_hUk) zO_k*@5G;szzYDOzPH)u+rD&?n2(^8YGBn! zF<=Eek__pW4a6=<=~{11lPox*U4rCzUYpfkK&`U_zA8{3yj9ngw<_{Hk)tCGKei<@ zZzg$yh$60#8X}7ol!A=N-PGT!Un|jUkiB`gqHEeI1kr5QUhv2Tpi~WDb{6!rej06oX^|#al|AMZeLN7@YBDVHPsxLoF8Lla=A4IF?n z)*)M(yKfNtq7DS{+@Nef7g^f2+v;}eu{nN&9W&|)#2sJ56U)gHvu?xpqBSGXL2)GZ zbse_P*04*M#|NUV!-vw(Q z;+yW`y>8|ldmN730+#UufTg1l?7#6g|9%u&(SlBG!5A}Oj@=6(K4zv7p-_MFvly{~ z-I{1Pfv_K?f8w`B}=<6*n;RNia6kZ*Xa$4N!Yfq(Xp4G*<) zVk*_P;WI(nws!-#+FHi?{D$xji0%>?8A$8#O;0@P$G%=n*YbQyyKp>9XxE zxy$RS%tp5`9tzDyr^fb}2WRjf0{%?-Q&yP1LOjC{Gk&Aq?lutQZbSEJbi&c=ZVPS< zH*i!#WZ8vg6`FDe`+hzGbF%Tv>pQ`0JAyPV$so!pK)Z6srj&r+fxvz;T?${W7+FwgY#r+d{?&dHXkt zFG>f1>|$hCM`xRtkYp<)cv>bY5-b3X_(VYmHhEk35234)rwG>dTFViN*<6nMoeB zn4HS?1+XN!1bL;V#8#_~S_6wDRyCwG#-?5}9e}j7(4dGb2FASygxB+sv(pLU)lj^s z+o1-mvyEU!8z2x$uG%(`48g&}_Jy9J_Wibpu%agc|I6V1z2&kRQJ&?2bL4-%oB(e> zAepR(TrPDy=_9j)_gaNVCZN4S;(AN=<>4vzL(8ydurA`f&?>=EF^nEf=5rSVge`*x zypbU9eZs4`i>+WvtsDy8(>V7T5M0UGkW?aHKs#d`X{8OEy5q!tg5%Uh&&B#7VYgBe z{|2?U5_K@<7Z7h8A*?9JQ*2_xmVY20oC1aT9r= zK|08Agf*)otO_&x%X1#oFFHZCgyXQyP@+lfYDK@~*p=tv%)MU2N5L|yWP`yo zyOW`N=Y$EI4GgC534Hm{091aH#JS#oD3Ms5VQEgdM4&`QWGA4kBa)br^~kLA4)#bv z#v#%B0xgqXsu}%BUf`wy$?E9zH;LaSyhNm^kOL4nVFs`dNa#|X>}L|W3L|a;Cnm`+ zmhRu5>?Eew4SP`PBsqEOv?>kxobvOyzz6yTT;vz2_!1R=L9J;PAM!7$fz=oC^U1%W zf;jx}VZkL0^p3>BX1L^O+e%`*NFXWUVU07&ijW$Ho;CWT33${v$`HuOPtvP?mx|w` zg2GvnC`%F&$dHP!QStj!F!50l;|yDC&!FKjy)=&sS!<%ZsY_K!f#awQ;;&K>u&j)pRimuH)tM|e(-n%j~7+Jk9xWD-T&tY@l zgH1ay){rL;t2L`^U|!>1fec>Cro}E_xBAck*|gxRrQl)k)e`YlK^jK>2gyhO2Orh| zmvR0>pKl#y7guBfe(dNv1p`H39{w%LsYm2xT*ATvM1D&+9obz9N1Zd%XHTD*KXrEc z+)3x`$@N~?9%1RY5A%hjHQRg2H=F9#B~(qT+G_NWK5|V4k|GoZEOKM(@C9>LYjsg_uCGNh9?3Ax7Se;q+D!?ym6D6j zoFb@%W-pMEe3tU>sMsi zq=aPff&no|v&pqN0~1Dbm2d`t7rp{-8_U$pYnQ37B1$DO zb;zZ!&?U=6D!+-k{5ln6q$w%;QL3#A*vO$zxG2AkA{@<1nFi+wN$G)A&>&$6gG|WX)Ji(YL$;WM)F6Em^61V{JWrun6d6SJ zvJ~k%0y}dzDZRjdwqf4^5<;PIWNJ8AH2^JpmHZ^nT=ATguQ=BEIOCpnx6Y@`@qGc!bM-Wong&$3%!JQ}cy~s%7{No=S`%OX|G5Z~E_|EM2Gcjdr zG5g)FHgq@cOSF}6-=Dz^@RZ{cWW&&JK@|3qaN%|ik|Lif9tBE*o=TbYNXCQZlcI>n zP7RJmqCR+pz@cFQN6%(4IvtHhY#XD~8%AajMah9~GLz7AL7`cqMA%AM=IiEjp4M$CjD0X3TSo<-Zyd$%Ag-aJB!P(;6sG?Z+M@Ie zM(1_O@Ch2tk*cqv2HPk(zpzNZaB?Gz4abt0U))mQNM@w56yZgwE<^$ZKi7jZk&H8? z&J;P(7(NSI~;_c`Z9xdHR|VG6kf)8wl+#*F^hI`Vv< z#1E14uhaWUM_}=@9pZ;tr>vP%r%nO#e}EeVy1K6iB&GenrKqVz_&~p{#z2SzeNEj}q9^cmO{|N**Z7mWrh8*!l-ql9sD}K{Ph> z)MC(R`I5|3r4STpTC0U09jRZ#rPzTMDd>K+(OYH>n)Br3f6!yoRGg#Yc`8m(!3xH| zQSD!;cs~`7p$H4}ZoTe_c?wa4skw`1=cnZ{>M-#m0RA13oTb4RsklVNX)2h(evWE{ zhoME!voXzLpMgJwCJDC;vqw}M_ie=D{R#>MPAL>&WYskaOHd2{nhEKhX%oSo_=_5k fHZ?uJFRf3&r$%4=I*n@+^}L-eN$O&t4Q_;KM- literal 0 HcmV?d00001 diff --git a/local_dm_control_suite/__pycache__/reacher.cpython-37.pyc b/local_dm_control_suite/__pycache__/reacher.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..72e81f39f2b8247606d70ca8df8cec51da470f87 GIT binary patch literal 4304 zcmc&%OK%*<5uVq+`4UA@6lKYi7wiS(W$Zkm2$m>`Q3z0!BGN!GKrkFm_bz8Ok4;Z6 zWiD1H(eTN-2FZzg^bZK&zmUH$SD%FZgPihJ_w15l66Ba6r?;lNr@N~9tFLMv&d)au zT+jX{*{v&v@jX3EF9*T{+Xc05PPmAK+nFkX$T zNzJRNzFJ&Q8eT)m^|+a|yq1z1kmtNPB{$>wWWifdaw}d;mb@h;&q2Q8T~Tr+UiPl? zrL&cZ;ay|%-YQ%0uCqmNjV-Y&r-pZfUprf8%k1jJ;;XE}DqWkcu&bx$8`Ha~$0p`% z)(1~d$-VYgRP@yowxe8(eojz>>2Hwqs13I2#>gMS(XG* z+IdIrYMaGkaF}QDP;x&?2APoPs5 zB8wq9he6J1FwBxfz1b2$ioK!{7dZx+G#4A|r<&le0>SckE6}3%*T2y9?fu?&x4-(w z@4tI{KP2U*mkr?&?tBT2G|o(BPE2X^t*!;jtkKpBE{7t`-N2Q@LCoE9rzq{Y@`$@h z#(3=h;pvW>OBmC0p%><|D3os2b%Q+TGVhF9LOZDQWfHe-QO9OQ6Mf8A3l!DOy}hmd zy>AV%h#}Sd^EVquS;9AtISW#`A$cyj@EDLF}`{FMZ_a!$|Ut=_Z4dVx`BW zUfYL8zSiR-GSot2SOUN{in&9chDS*tUPAT4;X!^9l1J#O;3l*y@BA79ykbi0jq%oa zZA)7^WBBSD>$Nks2Dc;QwIwV4>ewDTU5i;`BQnREX~I+4u839iiEA{l z%+Ck>mq1r9FKUBw8%GBD%kzU11SN45%^lpi;u3q#oHOgDWBz~L`|1C3ub_}oCM)CH zHF2Ff)~F%Gh#P2%d4KQe=FZMV*NK}LxaczR0ZqR}&24ICxK{2doFhc&FEMb*Kf*;{ zhDcHa7W&YFbodHfapSztIqzFDpiyOX ze~Nx0jAI^3>_^BfQ4PGiUBn;gGfLfFoE<{BFv|ps(g0!KDXJRtyYI*sMO9C3+eIY} z63z;T1u`faJx*XBWJHKg&g1T7pD*bbeVO@sL1hrdxM8iBqqWj!oy*hSbHt1iG-3A5 zGX(pYJuxN}>gMR-MPu`*mvRO+%w>><+&$#-73Zn)c4;#4rPjvHhoT#Vywi3HD;*To z81~1Yr8E|5)ynHM>2oyip}O_8 z(M27>oYRKqC>h8;tpndRRtL_jtihW2t$hu8_Udr@oQlfPPc}iNIt?G}gOLshSgJ?) zTRXY4J7XXlJ5TSNqIGPJ4d0L;`LXfF>f2)g{2LP`2Bktfc#b&OP7ysp4DJO(1Fq${ zBGVm|(R0vgHeFnspvb#Qa?A5olzrFTbPu|57RZA;MD!dC4q!l2#*yC=ekQ!V-5}OxVq!G%P^OA-J(6y*gVj7)lgHGD60V zPu!0g?*_wIwmWJ95KZ}I7m%MTgHmUGdc*g_80j6b zJcr=Vu;z2zxq?}*e&KAe0 zy#P1DbYpaX50#TbE$mzIH)fS=O<%1|kQHEMU`~cPnVsG925Yl-$ zsO!SN-!JDC=9e?+WVgK}KBf15jHamQ8ch={)g7Wi-5+U4;aOK69Ay?tEs;T8m0|pT z1~BEif~XDC2xv+~o8GksQDNs%TGSP5gR~1f%yRJ=ZK`tYG99@NHNP~e?dapDq*_#h zv=^%ih($wCzW6!4dJcj6c>E*od;yJNEtw1Uin(B}nQPW)eF}zu-(v=gDKk8xHGYbQ zisK2&yfGMmVh}grNKImX$d-~FCEJjRF`Uu8X)XX)WQREy$2t?2N2$+MB>)_3nFQbp zm7qICeJU1>S;i{r#~j6PBv0Cvq7`N_MI4-+7p-T4alzSRRNmqt^wli=7}7Lsl;3P9 za)Gvqv5Q42mM5;|A`F*K_ZdWaXQ63AVslvPBX zRVM}m$#$)Ya@VfL>yPlN!YgVSf}i*mHB^8DF~dZ(NPQs5?Jqs%$U{Q40#wO{l zOKpJ_G&7j%=4kn%VR}N_+-*0+C$ujaA~vWY3f8Nm% zu`c0P|MRzzsL?X&yKD8rTk5;1Gn7%B>!4n`FsQq!e%DT~s7#^*xV0bjdYn<7&5O$3 z7u)+=#Z~|D)^9hz*xC2@x1VnLJKImU_r*HB{vkE$i_1mJPlYO#t` z)6H(Gx+ziICXzr}%;w-ta#$=9B!HCLUgj1cIpq)p$UTPubqfLnaQ=k3B;TthDOsMJ zL#eClXIIs$_rCYN`a!K$*6@4!_bL12s;2!X73Mz&nR|GW>$;{ft*tSg8KFKf+J;Vf z6M3_3s=S4~)wWdL4(+y$_Ci<~6x&5rR}7s&sa;ZeCoB&t?TX5mkgv9@Dqjw3gL=EJ z@)hKl+Dj^5MZVE)sC+G49;~!iRNfBXXs@z*`%OF-*iw6qHP|v+VQ;Wi_9nZ))_P9+ zBCj7`I?>vf*+q8gNNazCUp~IVE}!V^8+?t~%L(g= z+1p1Ndz%;gZylQLw@`D9y@Q%}c%|>6=51zgXw7d;E<6kU$QSNL>UW-V;j(z(2T|)K z$+fA=C4W1K!%@n;U@(kDiYkW%dqEP!5%Tq+2%^;Mj-pPAnsrna2BUu5iIFRI;wTky zh^)2kC!88P@nE1vmZ=dlagk70!S{W^C|ep2LNATukP0?W1?Rn=u73H+e@u?SIGg$G3k!t;F*T8W&lZT)2 zbR?q0_1$zd47ocK6ZYVyJKP<_jEC+IpFDPx6r%6Be#Fq11)bE5yRM%kJWX1YN|GOE z3&Pxl9G3+j#CzP^#$iAAQa(;!TG$@WC1wZG+>XcSR2>SQrU#xn(`HSOBxG4|pLuFp zQKgC!CV%FO1R^z&mA3Fnz7=Y#LdFb7zy~}^rHLiVQje!YUr@P_F9LFp^To@;{f&)> zn;Xr7EJhv{!4p}gJy{cEuhJHa`F~kkj zNNWdLx!ECE(XWWd#-sG(0WwU-iEDjmM`khOh5id|YNuvqWm-nMVonR0IlLZdQ!};t z(1NVcHPS++GwW}R%>0${3mvC!Pp-{ReYQk5^^@oBevt0Cw_GKc)=LA^HFfcAdYS4a z8{JVD9(eIKgtX^xha4h?KE#8jAsh3(r@??^Gn8e@d0{XJ(q>T>pY0qZK_`(*T@eqw zQ>o`0w-p`;G6-3D80`fjj?|$pdmlYr-~8<9V{h}(lZP9d51%!Q;wHL@Ta>&@16KIy z0Pi^ryO-n(E)HjtTC_oRj(PH%G_0t0@7?-73dsr*&9d}{Zs~^Z;Lp?XVhjc1i7O^3hC-a$WTq3&;)lq1)v?aEvEzHx>%GW^U$*9|glv=%=Wj zxtb1XHvMRABo}2}wAoDS!?yZHreD&4y5_Nk#|G}|co*p1!_zNfJ^JK3a~tZ%a7pek zPSjd}q=|by38EW-(xmC``QeBst)?UG$RBVfE#{}bw01E-8qrV|_LLd|y@E)Z7$fz~ zrYWt2hh14wANOcvB1?Gz1J57L@^(Cm7-!z1#~)!kiGmTMu1_w_#MXMP-$EE96nc~} z2%gRgLv8rds~g_qfIV!PHfM_8#R%~s66_2u(8z~W^D!mo^k}vX^zzY~sM9be%kw4X zTR`h(hYpB-1c9^{wYF_!QZ3U{{m@9Rz%`l~9Okh_{0k0J$GYu)pSL)-q_Mk?=U}~AmSr%Q2nQW?zS)HH2 z+0F~9LffMASm*~)6-X2`h2+-M*PGUCQ(@eDZ zgmmNy#^=_kU(qLT&Nui;n7a7)Q^0{zLKgiBYFEC{ zy2`MQN^MK!VM<31@SD(EJ4Np=a9C{zicwV#XyV?_DZjh530J&z8+;N0fP(6raI7!f zEMYQnK~lGbGy~jM^VHcMX{nwe9C@G}Qk~HQOnK(`7U@CGhsW2LiQHvyV66`XXBH6L z%4{-k#f5KuuI*m`9DE5Q3uzIoSvxfPfB%iHe#$lZd2*t+3H2m)aJ3NS!D-89aH(3%Z7 z&#o}m-P%TEg0cHM9Fyj*ZN|R{e%w45nX`6U*z9BOS&szHehSy~4JffIK2O^r4y(R6 z+g=J5#nbuHa+-JBeLBqj?G2ZMEMh&ryM=8H4_bMT)@On7IGsSq*E7>kmmfjF$EtXXF_q)UmCj>0hwa`c)92WVr#VC6-{K%*c6%5 zSLHh)tQ+y=@WU@qR_ImGCfAf&v^+0>_IO_FjtUNv50RO7m*)6jB6SaQyo-_K9vp9` zF9P=r2!bd$nIU{b<MsOy|?g&rvl{5=E{(c)1oKip1X zw2rnKtD_BH&+2SxO;i0%)>zanFY1c4`c=TttoI7F?EW`ENf^h{-2^rwr(9c9iuAk1^g$m%g-`R2bFh~&Rw>EP} zDIVk8#405e6^RxS7(yY3gxr@Jf>c7RBk9py$?BK?{!jDFU9dmZ`h;5l`mgBsk9T|T ztKYxfm4-L&b=B{`r~sbi9W*cIpmS z_dk2$efsp#`sRi#_(^&I&A*3v>8c?P!I{w+-|TT|149Lu#xM|f(Ls=Bq3Z%fqX{rx zI*RvzA&FgTsrX%r@IlRZ3M)Z)EB0s@g;R2cY%2E+-b{K3Q53&X@zWnu&p)B$PbtaQ z4Wq^NlEOOTgQt7QjXPguHH`N$t=eBAt0S2DqEx^>LIeV}{3N^`9`trnTKHIPL5wjO zP-Q}Mo*M8A6#pDgLTICz6x-s@wDhvMqRjXT(z4l5rMliQt1$6Z{VIAkjLF5^z+d0+ z!qmyc7tD<2CTe(s2nyed2ztzQs(XfHBL^|n=e5%rEL z!z^hu$|Y4Mvqc=n9EufR82Ek#HDm_9JhciS)_|S;?gPu OdUCsb?9ZK_7lDq0g8R|w_|1l2Whj`=uL%C4u0fet~ z;*i`I#SF6YRNw-Uvj@0F%SqfJM&wNHCzKme@*_$}7llBAAZwvtAo<&`1%XTO*Ymp> zn&uxcC?PA>3hvU z5g$9$!MhQ90s6ql0iHIPxb<@lr&?e|gl4vCwV9LR+zxfzOwsAlot2dpR^ghPmC4ol z>G^JcMN5vK_=(>`$km!73-S5aeEa*TOfDmVXJ47>m>F8av=ieG0HM3kLClaN6S%hG z$tZ^CKeroYx(mDcs&of)@-af0f2A)Ig|PI=JEwxnS9r$7&->?)Nq5sw0r+UUC;Vin z)MJ5%(hQ<5!K#YM6Y+QWe6yU#BMLPB3>DHG1W~R7Wh$1^IOG5dM3KbvrQHo<3J|@I z9zWf9h{$H4BI1|Sei_3Re?XivQ^$EsA`ujhJ-|>WL{KQ#lv;mHw~A^&zb=AR5)Ndi z1f}lK%A`QK9R3AuGkqgnBW=smGT>&?I4?Ge7)xRYgG4|{pOWX45aAF6TH=_JFOkUF zjG1(J7|z+rY%vP$RD?s&Oc0omtSG{%uU4B*O+j|^n)*0JRoxL?72zv%qJlJB5PXTh zMba*xUgHWI6|#0((Gp>PLwVurZ+rz=O@58tI5TRL24Nyf)J6--udb~SSD^U~x2+ho z@>=58)Pb((;MwTH(|i2r$)im{@E|Ljey_(F#ZR~z*!cZNn-BBLQxg|q>ZVfA{hJEs zq($A-&T~&d!aDT{(z%nzmLE1ypL`FAWzcC=4JUuAv!ilVxe5!n1P-;DPTe6KspaX) GTmK8MvM<>H literal 0 HcmV?d00001 diff --git a/local_dm_control_suite/__pycache__/swimmer.cpython-37.pyc b/local_dm_control_suite/__pycache__/swimmer.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cfafaa9d1f01ae4c2a8b06d91820d9075643b28f GIT binary patch literal 7547 zcmb7J+i%?1dFO379M0%sSu4x-+RI&gFVkwr>!zCquDffmtz;c|<*lVO!L$KI&p8@V z=8)qWc+Pje^ZTy)Xl~Bc@Voa<89U=?+P~Aw_-BE*fh+yCu4znbYfNWGr1y-rp%ZO_ zHruA8Eznlml5{1iv@7VhBfD2^S7lo@a(cCPP0~)}_Ui4rq-&rX?S`b?Xs$Qko|kkz zTIemd7bV>QeX4y*(sR*L?{xdLq$|;K`xUG+AFcGxw9lYzp}oo$+h_UW=-jc^KF?0E zrDt0ERepYSft^0q`6{ch%8tpF*(=ZVr+WM1mwHy%8QJq1TP;Uh>?}L?OmBOWug|kr z@%}oyz%F9;H`r^;!}HR&H1;}sM%Bi!H^m_8kx(H&d6%M&b z(c#iqEUezDf`VsfaHN&{;WxL$#C5%Vj_ddDnr5U!>^}r#< zvapkR$&MGKDbLcD>w4Zz(M_-6>3Olgmp}sPHScEZf#6-pFzxeBxN|t3Hy-pJpcPZ7 zu^1=UOO?pwj3sMo(wilJ+wVx0{DWTPE0)$uE}GkG-s4>^xVNp?+V(=)m*f@OO2agx z<&z!B<@S@EFye90EJC z&UmxG(8LnDe*L>wc9R~zvd>u%XICD{v)c>ALu{vGQV)Lgv~k_Sl`i6z!6@WfuIC0b zo?AF@b9i=o+FyS+3gd^~c<~qXY11fd1;Jv8$NQm3;vU4}KLqf)1iU#ViSi?e#?8VW z_oJ{EX5tKH5UX?}L8#klP=!;D|N8rQOUaCBmZOXFc>ce*KR4z6>iZ|yU%+HdW6DEQ z7iX!-6zA}mwx8s!R`PcB{olj5DRUR__JTy(k^F+7=Lz{L>E*b#nfY@gH-BpUg%0Fc!?(V?vl7ML z3{qOah^@hMlxn*bzx@My-qLrm_Wsq+Zn5 zr+_R@`M2(EY~8Yk)G=#n^~X?^i*@5RgMjaUePvaD>sfR2mh8in6J7DdwpB`{=XdA%)y*CzN==n z+-9}s`jL}6BR4bl>deintp40M*5Pv-<+vJ~%eC{`k;~@M;?}j?#q4$U&8W^6@>*VJ zi$6Dx)pw`L?;5#T&bpL0@)|q+9Gqj`<+6V+x61xk@;R)#Qud!I`{yV9tNHx2Kd+43 z+@YCr=cw|i{aqNQQG=bujux=fbLX`mYX|r91?=|QM~nI5XpWsf*4e8MjW2ZZv3$R9 ztdHi|MSO9JT^ucZuI=d3*E{*C+mI95moRRTUC5V?5fOc%Kl=E)8su{dQbr{3oP6G; z;YZ&p-8)=E?7@qS9UJ32VJtn|*eEL9-H!bS2@83;?Wt!=kiQ+ieKYeS9;BJ~wX(w# z{Lvs3oVC3Byu(BI0Po4suwLqI3yTdGTUQ<@B4U{8@Cw}uC+K8@AWKAHr#wy*@q!%p zu$sa*_)m_YQ7(=1!(8ej&zW#u86)c=g1f7S7bPBVdq=oKH6p1_=*(_41^i|xtP^IrLAL*HvL0z8eKhl z!G}q8g_|;=(M4q^N`ee;M(e%8WML0-sig3+eCm9D?Ou=I_pxyQgD12CEoW=P*JgZr z>$>#J=?5UjW4AC@qc3=t9r`k!Yc3V;9|X~WuM0tEwUdZmkckGSD_k;NrHzsCfh%~x zlp`-Jz={>_=HS74#L1n?(>< zWQkg?d72^+OagpD(tDkqqP9zG)4bw0!JJql4()y*Xz3Qc)FiYBKt-jq8%E$x;Yc>s z6)k;wQNGA9vv3uF`O35;30+y&Ch!M5sg51$kUS$1ccw ze-Bss5pJ5{z}hat?l$y>Vd+btTzy&E;dyEUwStSDigHY{(JsDO)K~Q-L(Vq=7=f#b zOFBgcg6Shb4E6%~Q%M=jEGd)RB(ume!%bSSW+t;)l`Kl8kMPbMR)gzwVL8p2wmQOq zvMr}zt1|Hab4;2`K$E-8q}kQ}y#Zpynd@PhTIA+H!9Y$QWMsX!LW7i13@3MVV| z`**Obq5YoMy6U|L#pF;kTYU%ZsB|K@WI3>K##1ya;wCc zwq7zJPj_xShwKwRefM(2Cn)szQ(Q6*)eJf(7iZeQDK6`>w}alvk96_|u9P;zwYHAz z)?j+uWH=Y(au%~#1bQtoAmmm$vP8RbIKA|WqH1Pfy%qm(I8 zK1RyjDr_|nUH~5!7BxzzwDE#O*|duaofIo97GyzDBS82Eq=MD~j8=hEGTjaO97Z$A z5XD<5k9LZAn#|7mtuJt;Wb-tm0qtAShb!edv|gTWB3klp z6VWc>C4*o`My`#_V<;uWqz|u-#Y9_Xu*pH(;Yw80G7%SO#cZo-6-L}IYy{Fl)Fr`) zo*)LHB9HJm!FzG~W$t`h|WCn5gr9B3WXK~2a!+%DhF5r=@AL1a1Zd}%hd~K|G?ir z^dApTiflpJgDGEsjW1J*%{7C}@$k%8WGBW=#6_%4#6_9xNpVSWn8Ryhalix0`hg(b zm2L?!8|-E3y~7VW(EzH^g*cL|EFv^jXtsfE$z5BIB<4FwpDg89!N8WYrpI7reDd?- zZK$q|rc*e+G(j(#R;C@?f3z28eGder;`Z$h6Rp zo^*_KrGz;dsA*3Pq?bC9OUf1~);5R#c_)VD4I&h@WV9W|r6$9eZI{{Iwk-9`mXAD7 z)gZ2U_xj3iuX!9A#*(UUXqu5i4lkyWsFiuv# zgKf$(P~agP@wR8DSfed(B(b!`u!*qHSTe<@llg){hGJkABFaRm@Es%#!63?-R4GEO zNaei1A7Zvap9?8%f)6r_%=ZyUOAyM$STAb6FI_W0Glj*EAkbfsWTm*B;bJML)_9W1 zIgsj+j>~iSa}b$^9E`jXaysN$c#?&otQ3~V5SN(|!y+A-)U#*hCUQdfEM`47Re=D- z0e!EsXYW;6<+_$vCT}+0#(fT}&h$CVxvpik%td9<7}axRVL1I7PTw8=*CtAX%FEIL zjXl^*xxfpualeq|M2SG&4^yNJB`#(mM7F<7rf^adzeM2u!*~1Pn%57TvOq}#1*afj zpDWuS&qo>$;zO>Z49Xx5DU`=yQl$o-Hw4pwS9i?7w65uuaoV`BIRoa>W&q5DgYM7% zihjTPu=_Rn`_~V}9?pl-wiLPm7*Kjt=m!((5OG>VsU2!T-Q7(58GT>~vJip}cLTah z^@ieYg?-GU>TVo$LevVUUZJeM%JNHL(deRD?ovFY8Ib;=s9QKPEFcS6lS|6b{Bv55 zY(%j{m{rQ_mq*gH6@Hu4-{?d#UXURR3W4&8yaPF^3N;QfH8kSKG~*QCe~H(B#FbK$ z=FS@w7Oo;Vl>dz3xiQ|qy!-_Dgm}7c;gT^UA^(_AjzHpALv*eKqdK9Vid>SmByEBw z+hh%IO`J^ZB@a?A_GOGi=$Q31YceFC@x}^Ya6>IWRe!YSt;tBBqHO1pffG% z_XI;xn%zdpTzLZlw$)JOeub2$2`*+F1^I|#f$G6C-2I9+EdN4;9Emca2HfsgU?;}D zBrUQPQd(;upx2XF7Ii2Swg9mzEQO3q7z%^S8b#c)sA&&ZCzA5g9-QqiREz!+LDfvc zu?jh4+%d!}(3Heo1d$=ai^6`83}TkzFCY*Z(pQuhe8H zki%2VCmm@Sr6@7gOQ6Zo5>^vz6)uTDG561yr<4>`@NE1WF3n0x4QuKf&6;>hoIIo3 zXLS1^-N+6L0;~8Ny8S(FD1R%1-QiI*HrllM80FHhlk)rnu8CitcN;5?BbBQ8ww#>8 zh@AmtBAA8jydX3Sa`fV7xV7Es9|=SS4aMBFp(P?!5nYk-+L!*GAgVMCh2+J`%vbVn z4Xr^IMyawRi!+_7nSiAZz=}|-QnihWbhsj;`A|4GX!VkPo~jzgY5(^6H*bFD?v^aW z`giYqa%W4tO(W}DLAT2p#TIE%+5FC(t#wuIHIa?U%DnaTUfD(xG25y$p2g5-g^721u3aSW*uS{o7R&s51M@`qpS9Pc9G?vc( E534!!#sB~S literal 0 HcmV?d00001 diff --git a/local_dm_control_suite/__pycache__/walker.cpython-37.pyc b/local_dm_control_suite/__pycache__/walker.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f4c03f183779313e166ba99afddece1f13744e66 GIT binary patch literal 5283 zcmcIo&2tmU6`xOOEXl?ggJBJ(H=ATv8?Q(usZB!2f?2$~PFO=>Q@fc;)i74K)Z(S$Z!pvXcM&Qnu@kSTdt*OJG5OJ{gtp1 zRo$v;tA*1+r(Opz@1N3S4H0XJEi7&Wk z_$hapFS=*>Y4@C1JX|@_-1B^ipLwpi7sUC)clh#=E|$2>?T*RM@^jDiXS%!kM$ZaA zuX--N(Q|>nqk1lVuJKiV@wvt?ifV7|ndz>fp;FNpey%+TcU;2=nYBnG|EmqDC)ok85r(6WuD zN;K#t?F3Y{oy3_;LXg&;p9<=1CsCwk)}HFB?NI0Xk)CNiqhsKt#&GkV$ObY_na{F8 zKNM`dXApN;wl7$ea1pYvzr4-T49Ds+aEP%yXlE?xFh5O2mbQkqM@mwy$5GfcC2^70 zr0}_?=I51-ot@3yoj+^xG`dvv&#PDVlSo`S5ZsTmE15_$A-!8Sui!sS+J5Nq$QujG zO9w$Fu8jBSKgpN7A}f~mz>imOafOa)pwbL^26f)p6LEV#^5sJ?-X=-VV~>lH%2m8T z8hU0t(_U#$&CKNb2%hoMcxsJ|{#u|tH8QJbkIa$PF-BTe;pU%=k@0)ul@9q?!yC#B?rPw`$F)%QmB_&vmg>)7(_vqFP4X-HVC2Ltmf6b`%luK zo#v-HGKoA$)hon5K3HL%g>}jbcFOCU@j)PyI1+J|pY}ezv%P!o&TVgZ>&wlZ-OamA zTdrW31T1Lzl9-P59>S=)Xx&9GBC*^fY`4OE)h&>Om=Tl#`7-^KgC zE8NV2aP#oTSZUWaWZgCsG zHm~q1ekj)pj9?n_3cF%kN^tTnEeY15mO_uh!#!g7_o9M=h_p zL8D-%SnbysP;$F`lG|dy+nFUunbiO{lorh4$0rz0WH6)v7_tL_h!A8?O77bLIUzqv zIv+)V{#JgbU@9H#Wm1SRh%uo6;Mrr{$`d-iu%_A~Cd-!2_?0%7TfC(H>nY^+*KMdJk!UK`E9QHD)xsx|o`dU9L(VEh}7*O*s;TuTiZrj>`#y-2vQ-2T!?Ea3wV%Kdk|I6nC#X)~ zNd1M@LC(sp=X0*5Xkf{62Z&PTjzW|I}dcF=(lF{FBk`66ReGoHHG>~e$~ zBqx;&QdTqGsK{fkvOE0(h}W3_GDDg^dk_zz{*zWQqID1KBoWf0OgxwfW*t(RaEwe@ zbTKnUf}~))jkGaSi6)E1Blci+iFF2?-%et}FU$x5)Yy`yPS*1W8NfdakU&BUn@f1%T63L(k_tuN?w%VC_?T{yCNm zQBq~E^@?6zABt$yDi>^12%?96X)GC9`YlwHmy@COj3eB}G81VMjP1}KAy=nc6NM<> zfvN2v*;erfiq4dUk&0&0Ip$|&zf@o^{PsQ*{UAjiKeoTEn7v;b%KIw8UuOeY2^|}D z3>i=0M`inIL;y6#LjoZsY+9wza=AwF1~*{61Zu_HZm3*>vk=DCp;gA=6NaA1x41*U zhiCcu?B~^Nt*D$5f^3wRQG;u5) z1Vg27$^c;~ggG-xzJb>vN}=T1f)Wk2*PE!D)J)}5nocPZsrv{Qi;;e49BG6o z*MKt=4nP}LHI7Qd88TMXa zph9`gaByLuzv0CbVtZ}EL}qlSsiG*Ey#hg(v_f9w9vmM5ItDIl#;V56m*!K0<7RPa zA89K<=2cwexIHqKwULSU3f|#RJtUhC4Y@tCcFNVNkTTlK_0$+z5$Pzey|ka&Ba7EZ z_OiyU6%BP`6#<*iLn8~rwTVVbU>@7H-$n3l*%J8dmm_LCjNp1_15`u?phH zWD>^&11eMLg23~)>v;wFi|{34`xY7`<=pux6$(5bmtM6(UQL5;lmz^kZnMWEEre@X zr?GQc5(@ZYTc}{1&wD$&8{47B8=xK#eq7bo~yir*s$7jGA79?icY_L%b*2(G8U0${W^D%x&u1%{lpF z;_oL^{gf)YtV=>$d4sCYsQMKu9I=4qwg|%#nhBpvH&$r}-30@bN!qcn(3p1|g$K=d z)#8-pbOv;#z$&CWNlKDRNo0v`ZTd|@lH~LGX+ukf#Z~;=+dp4OvghK{$hnzW>eEJR zkOg5{K-%I=XVFY~QlZ;3-mT5gHtyfvRTq2j_STnMyXqn z&`xuE=l<62X7TC3#PvyC+2wmQnC@;W@yab4tAsJ5(n*lZoohw<_X&MRO6g00Whjp} boZ?q`H(E?fw@mz@c1*{1^rc!^mX`koT{A%9 literal 0 HcmV?d00001 diff --git a/local_dm_control_suite/acrobot.py b/local_dm_control_suite/acrobot.py new file mode 100755 index 0000000..a12b892 --- /dev/null +++ b/local_dm_control_suite/acrobot.py @@ -0,0 +1,127 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Acrobot domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import rewards +import numpy as np + +_DEFAULT_TIME_LIMIT = 10 +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('acrobot.xml'), common.ASSETS + + +@SUITE.add('benchmarking') +def swingup(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns Acrobot balance task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Balance(sparse=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@SUITE.add('benchmarking') +def swingup_sparse(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns Acrobot sparse balance.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Balance(sparse=True, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Acrobot domain.""" + + def horizontal(self): + """Returns horizontal (x) component of body frame z-axes.""" + return self.named.data.xmat[['upper_arm', 'lower_arm'], 'xz'] + + def vertical(self): + """Returns vertical (z) component of body frame z-axes.""" + return self.named.data.xmat[['upper_arm', 'lower_arm'], 'zz'] + + def to_target(self): + """Returns the distance from the tip to the target.""" + tip_to_target = (self.named.data.site_xpos['target'] - + self.named.data.site_xpos['tip']) + return np.linalg.norm(tip_to_target) + + def orientations(self): + """Returns the sines and cosines of the pole angles.""" + return np.concatenate((self.horizontal(), self.vertical())) + + +class Balance(base.Task): + """An Acrobot `Task` to swing up and balance the pole.""" + + def __init__(self, sparse, random=None): + """Initializes an instance of `Balance`. + + Args: + sparse: A `bool` specifying whether to use a sparse (indicator) reward. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._sparse = sparse + super(Balance, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Shoulder and elbow are set to a random position between [-pi, pi). + + Args: + physics: An instance of `Physics`. + """ + physics.named.data.qpos[ + ['shoulder', 'elbow']] = self.random.uniform(-np.pi, np.pi, 2) + super(Balance, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of pole orientation and angular velocities.""" + obs = collections.OrderedDict() + obs['orientations'] = physics.orientations() + obs['velocity'] = physics.velocity() + return obs + + def _get_reward(self, physics, sparse): + target_radius = physics.named.model.site_size['target', 0] + return rewards.tolerance(physics.to_target(), + bounds=(0, target_radius), + margin=0 if sparse else 1) + + def get_reward(self, physics): + """Returns a sparse or a smooth reward, as specified in the constructor.""" + return self._get_reward(physics, sparse=self._sparse) diff --git a/local_dm_control_suite/acrobot.xml b/local_dm_control_suite/acrobot.xml new file mode 100755 index 0000000..79b76d9 --- /dev/null +++ b/local_dm_control_suite/acrobot.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/local_dm_control_suite/ball_in_cup.py b/local_dm_control_suite/ball_in_cup.py new file mode 100755 index 0000000..ac3e47f --- /dev/null +++ b/local_dm_control_suite/ball_in_cup.py @@ -0,0 +1,100 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Ball-in-Cup Domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers + +_DEFAULT_TIME_LIMIT = 20 # (seconds) +_CONTROL_TIMESTEP = .02 # (seconds) + + +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('ball_in_cup.xml'), common.ASSETS + + +@SUITE.add('benchmarking', 'easy') +def catch(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Ball-in-Cup task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = BallInCup(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics with additional features for the Ball-in-Cup domain.""" + + def ball_to_target(self): + """Returns the vector from the ball to the target.""" + target = self.named.data.site_xpos['target', ['x', 'z']] + ball = self.named.data.xpos['ball', ['x', 'z']] + return target - ball + + def in_target(self): + """Returns 1 if the ball is in the target, 0 otherwise.""" + ball_to_target = abs(self.ball_to_target()) + target_size = self.named.model.site_size['target', [0, 2]] + ball_size = self.named.model.geom_size['ball', 0] + return float(all(ball_to_target < target_size - ball_size)) + + +class BallInCup(base.Task): + """The Ball-in-Cup task. Put the ball in the cup.""" + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Args: + physics: An instance of `Physics`. + + """ + # Find a collision-free random initial position of the ball. + penetrating = True + while penetrating: + # Assign a random ball position. + physics.named.data.qpos['ball_x'] = self.random.uniform(-.2, .2) + physics.named.data.qpos['ball_z'] = self.random.uniform(.2, .5) + # Check for collisions. + physics.after_reset() + penetrating = physics.data.ncon > 0 + super(BallInCup, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of the state.""" + obs = collections.OrderedDict() + obs['position'] = physics.position() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a sparse reward.""" + return physics.in_target() diff --git a/local_dm_control_suite/ball_in_cup.xml b/local_dm_control_suite/ball_in_cup.xml new file mode 100755 index 0000000..792073f --- /dev/null +++ b/local_dm_control_suite/ball_in_cup.xml @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/local_dm_control_suite/base.py b/local_dm_control_suite/base.py new file mode 100755 index 0000000..fd78318 --- /dev/null +++ b/local_dm_control_suite/base.py @@ -0,0 +1,112 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Base class for tasks in the Control Suite.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from dm_control import mujoco +from dm_control.rl import control + +import numpy as np + + +class Task(control.Task): + """Base class for tasks in the Control Suite. + + Actions are mapped directly to the states of MuJoCo actuators: each element of + the action array is used to set the control input for a single actuator. The + ordering of the actuators is the same as in the corresponding MJCF XML file. + + Attributes: + random: A `numpy.random.RandomState` instance. This should be used to + generate all random variables associated with the task, such as random + starting states, observation noise* etc. + + *If sensor noise is enabled in the MuJoCo model then this will be generated + using MuJoCo's internal RNG, which has its own independent state. + """ + + def __init__(self, random=None): + """Initializes a new continuous control task. + + Args: + random: Optional, either a `numpy.random.RandomState` instance, an integer + seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + if not isinstance(random, np.random.RandomState): + random = np.random.RandomState(random) + self._random = random + self._visualize_reward = False + + @property + def random(self): + """Task-specific `numpy.random.RandomState` instance.""" + return self._random + + def action_spec(self, physics): + """Returns a `BoundedArraySpec` matching the `physics` actuators.""" + return mujoco.action_spec(physics) + + def initialize_episode(self, physics): + """Resets geom colors to their defaults after starting a new episode. + + Subclasses of `base.Task` must delegate to this method after performing + their own initialization. + + Args: + physics: An instance of `mujoco.Physics`. + """ + self.after_step(physics) + + def before_step(self, action, physics): + """Sets the control signal for the actuators to values in `action`.""" + # Support legacy internal code. + action = getattr(action, "continuous_actions", action) + physics.set_control(action) + + def after_step(self, physics): + """Modifies colors according to the reward.""" + if self._visualize_reward: + reward = np.clip(self.get_reward(physics), 0.0, 1.0) + _set_reward_colors(physics, reward) + + @property + def visualize_reward(self): + return self._visualize_reward + + @visualize_reward.setter + def visualize_reward(self, value): + if not isinstance(value, bool): + raise ValueError("Expected a boolean, got {}.".format(type(value))) + self._visualize_reward = value + + +_MATERIALS = ["self", "effector", "target"] +_DEFAULT = [name + "_default" for name in _MATERIALS] +_HIGHLIGHT = [name + "_highlight" for name in _MATERIALS] + + +def _set_reward_colors(physics, reward): + """Sets the highlight, effector and target colors according to the reward.""" + assert 0.0 <= reward <= 1.0 + colors = physics.named.model.mat_rgba + default = colors[_DEFAULT] + highlight = colors[_HIGHLIGHT] + blend_coef = reward ** 4 # Better color distinction near high rewards. + colors[_MATERIALS] = blend_coef * highlight + (1.0 - blend_coef) * default diff --git a/local_dm_control_suite/cartpole.py b/local_dm_control_suite/cartpole.py new file mode 100755 index 0000000..b8fec14 --- /dev/null +++ b/local_dm_control_suite/cartpole.py @@ -0,0 +1,230 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Cartpole domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import rewards +from lxml import etree +import numpy as np +from six.moves import range + + +_DEFAULT_TIME_LIMIT = 10 +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(num_poles=1): + """Returns a tuple containing the model XML string and a dict of assets.""" + return _make_model(num_poles), common.ASSETS + + +@SUITE.add('benchmarking') +def balance(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the Cartpole Balance task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Balance(swing_up=False, sparse=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@SUITE.add('benchmarking') +def balance_sparse(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the sparse reward variant of the Cartpole Balance task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Balance(swing_up=False, sparse=True, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@SUITE.add('benchmarking') +def swingup(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the Cartpole Swing-Up task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Balance(swing_up=True, sparse=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@SUITE.add('benchmarking') +def swingup_sparse(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the sparse reward variant of teh Cartpole Swing-Up task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Balance(swing_up=True, sparse=True, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@SUITE.add() +def two_poles(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the Cartpole Balance task with two poles.""" + physics = Physics.from_xml_string(*get_model_and_assets(num_poles=2)) + task = Balance(swing_up=True, sparse=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@SUITE.add() +def three_poles(time_limit=_DEFAULT_TIME_LIMIT, random=None, num_poles=3, + sparse=False, environment_kwargs=None): + """Returns the Cartpole Balance task with three or more poles.""" + physics = Physics.from_xml_string(*get_model_and_assets(num_poles=num_poles)) + task = Balance(swing_up=True, sparse=sparse, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +def _make_model(n_poles): + """Generates an xml string defining a cart with `n_poles` bodies.""" + xml_string = common.read_model('cartpole.xml') + if n_poles == 1: + return xml_string + mjcf = etree.fromstring(xml_string) + parent = mjcf.find('./worldbody/body/body') # Find first pole. + # Make chain of poles. + for pole_index in range(2, n_poles+1): + child = etree.Element('body', name='pole_{}'.format(pole_index), + pos='0 0 1', childclass='pole') + etree.SubElement(child, 'joint', name='hinge_{}'.format(pole_index)) + etree.SubElement(child, 'geom', name='pole_{}'.format(pole_index)) + parent.append(child) + parent = child + # Move plane down. + floor = mjcf.find('./worldbody/geom') + floor.set('pos', '0 0 {}'.format(1 - n_poles - .05)) + # Move cameras back. + cameras = mjcf.findall('./worldbody/camera') + cameras[0].set('pos', '0 {} 1'.format(-1 - 2*n_poles)) + cameras[1].set('pos', '0 {} 2'.format(-2*n_poles)) + return etree.tostring(mjcf, pretty_print=True) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Cartpole domain.""" + + def cart_position(self): + """Returns the position of the cart.""" + return self.named.data.qpos['slider'][0] + + def angular_vel(self): + """Returns the angular velocity of the pole.""" + return self.data.qvel[1:] + + def pole_angle_cosine(self): + """Returns the cosine of the pole angle.""" + return self.named.data.xmat[2:, 'zz'] + + def bounded_position(self): + """Returns the state, with pole angle split into sin/cos.""" + return np.hstack((self.cart_position(), + self.named.data.xmat[2:, ['zz', 'xz']].ravel())) + + +class Balance(base.Task): + """A Cartpole `Task` to balance the pole. + + State is initialized either close to the target configuration or at a random + configuration. + """ + _CART_RANGE = (-.25, .25) + _ANGLE_COSINE_RANGE = (.995, 1) + + def __init__(self, swing_up, sparse, random=None): + """Initializes an instance of `Balance`. + + Args: + swing_up: A `bool`, which if `True` sets the cart to the middle of the + slider and the pole pointing towards the ground. Otherwise, sets the + cart to a random position on the slider and the pole to a random + near-vertical position. + sparse: A `bool`, whether to return a sparse or a smooth reward. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._sparse = sparse + self._swing_up = swing_up + super(Balance, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Initializes the cart and pole according to `swing_up`, and in both cases + adds a small random initial velocity to break symmetry. + + Args: + physics: An instance of `Physics`. + """ + nv = physics.model.nv + if self._swing_up: + physics.named.data.qpos['slider'] = .01*self.random.randn() + physics.named.data.qpos['hinge_1'] = np.pi + .01*self.random.randn() + physics.named.data.qpos[2:] = .1*self.random.randn(nv - 2) + else: + physics.named.data.qpos['slider'] = self.random.uniform(-.1, .1) + physics.named.data.qpos[1:] = self.random.uniform(-.034, .034, nv - 1) + physics.named.data.qvel[:] = 0.01 * self.random.randn(physics.model.nv) + super(Balance, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of the (bounded) physics state.""" + obs = collections.OrderedDict() + obs['position'] = physics.bounded_position() + obs['velocity'] = physics.velocity() + return obs + + def _get_reward(self, physics, sparse): + if sparse: + cart_in_bounds = rewards.tolerance(physics.cart_position(), + self._CART_RANGE) + angle_in_bounds = rewards.tolerance(physics.pole_angle_cosine(), + self._ANGLE_COSINE_RANGE).prod() + return cart_in_bounds * angle_in_bounds + else: + upright = (physics.pole_angle_cosine() + 1) / 2 + centered = rewards.tolerance(physics.cart_position(), margin=2) + centered = (1 + centered) / 2 + small_control = rewards.tolerance(physics.control(), margin=1, + value_at_margin=0, + sigmoid='quadratic')[0] + small_control = (4 + small_control) / 5 + small_velocity = rewards.tolerance(physics.angular_vel(), margin=5).min() + small_velocity = (1 + small_velocity) / 2 + return upright.mean() * small_control * small_velocity * centered + + def get_reward(self, physics): + """Returns a sparse or a smooth reward, as specified in the constructor.""" + return self._get_reward(physics, sparse=self._sparse) diff --git a/local_dm_control_suite/cartpole.xml b/local_dm_control_suite/cartpole.xml new file mode 100755 index 0000000..e01869d --- /dev/null +++ b/local_dm_control_suite/cartpole.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/local_dm_control_suite/cheetah.py b/local_dm_control_suite/cheetah.py new file mode 100755 index 0000000..7dd2a63 --- /dev/null +++ b/local_dm_control_suite/cheetah.py @@ -0,0 +1,97 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Cheetah Domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import rewards + + +# How long the simulation will run, in seconds. +_DEFAULT_TIME_LIMIT = 10 + +# Running speed above which reward is 1. +_RUN_SPEED = 10 + +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('cheetah.xml'), common.ASSETS + + +@SUITE.add('benchmarking') +def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the run task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Cheetah(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Cheetah domain.""" + + def speed(self): + """Returns the horizontal speed of the Cheetah.""" + return self.named.data.sensordata['torso_subtreelinvel'][0] + + +class Cheetah(base.Task): + """A `Task` to train a running Cheetah.""" + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode.""" + # The indexing below assumes that all joints have a single DOF. + assert physics.model.nq == physics.model.njnt + is_limited = physics.model.jnt_limited == 1 + lower, upper = physics.model.jnt_range[is_limited].T + physics.data.qpos[is_limited] = self.random.uniform(lower, upper) + + # Stabilize the model before the actual simulation. + for _ in range(200): + physics.step() + + physics.data.time = 0 + self._timeout_progress = 0 + super(Cheetah, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of the state, ignoring horizontal position.""" + obs = collections.OrderedDict() + # Ignores horizontal position to maintain translational invariance. + obs['position'] = physics.data.qpos[1:].copy() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a reward to the agent.""" + return rewards.tolerance(physics.speed(), + bounds=(_RUN_SPEED, float('inf')), + margin=_RUN_SPEED, + value_at_margin=0, + sigmoid='linear') diff --git a/local_dm_control_suite/cheetah.xml b/local_dm_control_suite/cheetah.xml new file mode 100755 index 0000000..1952b5e --- /dev/null +++ b/local_dm_control_suite/cheetah.xml @@ -0,0 +1,73 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/local_dm_control_suite/common/__init__.py b/local_dm_control_suite/common/__init__.py new file mode 100755 index 0000000..62eab26 --- /dev/null +++ b/local_dm_control_suite/common/__init__.py @@ -0,0 +1,39 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Functions to manage the common assets for domains.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +from dm_control.utils import io as resources + +_SUITE_DIR = os.path.dirname(os.path.dirname(__file__)) +_FILENAMES = [ + "./common/materials.xml", + "./common/materials_white_floor.xml", + "./common/skybox.xml", + "./common/visual.xml", +] + +ASSETS = {filename: resources.GetResource(os.path.join(_SUITE_DIR, filename)) + for filename in _FILENAMES} + + +def read_model(model_filename): + """Reads a model XML file and returns its contents as a string.""" + return resources.GetResource(os.path.join(_SUITE_DIR, model_filename)) diff --git a/local_dm_control_suite/common/__pycache__/__init__.cpython-37.pyc b/local_dm_control_suite/common/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cb0e012bbcfada34e3f59919ee1b351853cf1b74 GIT binary patch literal 1009 zcmZWoOK#gR5G5&DmVa^323=&O2I#^YyXZOynvYWiNKzn9fkHq*Fd}U;l|+}M(!@wt z%?)~ic-aH=GTL_4D`eH7@XaNOo~9`Xin zPW-sTTfBXRL}v$m$MF*H@TDshcX@4udfoY(-6Bt{RJn<)iZYh7iHPh}M2X5Wl}F4N zVNEnvI^rs0GB^D{z!&v!*<~k2rG*t#W;3NNghDRQq=9VUFU_>fEgj$GKZFpGYW|`> zxJ^CCm=#*G)b!7@biVd9PQOkiWFMzW>FUh$hco8$#fduqF9@Xzmbzeq9Y^m3*pgH4+PAe!ydVpFiGPqU{s{d**ofylJsAf;aH|3XPM9el&j!odoWd*7@P^tayziXfN;9A zHGog55|&b)(M07|tCX4oG~c&~QkhFjX@7Qc(_H5=vCzBO#!IKhNei%EhOY;?PPQvs z$z1{GADPI>gsJ2C`jKGVL@de_7iskIU_WxSBbM_>3tMQo4id)hNQoRA<_;SR6P@&X zfv!Vp?fR-44%{WzEeCA?H~T6tecB4$g#%?%LwQ=Ac?J7Q3L^TM`}1;Hrd@@ZT-T^)M)ok-I8;Z$~u?YjiL-86X=6Hj$quRyL + + + + + + + + + + + + + + + + + + diff --git a/local_dm_control_suite/common/materials_white_floor.xml b/local_dm_control_suite/common/materials_white_floor.xml new file mode 100755 index 0000000..a1e35c2 --- /dev/null +++ b/local_dm_control_suite/common/materials_white_floor.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + diff --git a/local_dm_control_suite/common/skybox.xml b/local_dm_control_suite/common/skybox.xml new file mode 100755 index 0000000..b888692 --- /dev/null +++ b/local_dm_control_suite/common/skybox.xml @@ -0,0 +1,6 @@ + + + + + diff --git a/local_dm_control_suite/common/visual.xml b/local_dm_control_suite/common/visual.xml new file mode 100755 index 0000000..ede15ad --- /dev/null +++ b/local_dm_control_suite/common/visual.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/local_dm_control_suite/demos/mocap_demo.py b/local_dm_control_suite/demos/mocap_demo.py new file mode 100755 index 0000000..2e2c7ca --- /dev/null +++ b/local_dm_control_suite/demos/mocap_demo.py @@ -0,0 +1,84 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Demonstration of amc parsing for CMU mocap database. + +To run the demo, supply a path to a `.amc` file: + + python mocap_demo --filename='path/to/mocap.amc' + +CMU motion capture clips are available at mocap.cs.cmu.edu +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import time +# Internal dependencies. + +from absl import app +from absl import flags + +from local_dm_control_suite import humanoid_CMU +from dm_control.suite.utils import parse_amc + +import matplotlib.pyplot as plt +import numpy as np + +FLAGS = flags.FLAGS +flags.DEFINE_string('filename', None, 'amc file to be converted.') +flags.DEFINE_integer('max_num_frames', 90, + 'Maximum number of frames for plotting/playback') + + +def main(unused_argv): + env = humanoid_CMU.stand() + + # Parse and convert specified clip. + converted = parse_amc.convert(FLAGS.filename, + env.physics, env.control_timestep()) + + max_frame = min(FLAGS.max_num_frames, converted.qpos.shape[1] - 1) + + width = 480 + height = 480 + video = np.zeros((max_frame, height, 2 * width, 3), dtype=np.uint8) + + for i in range(max_frame): + p_i = converted.qpos[:, i] + with env.physics.reset_context(): + env.physics.data.qpos[:] = p_i + video[i] = np.hstack([env.physics.render(height, width, camera_id=0), + env.physics.render(height, width, camera_id=1)]) + + tic = time.time() + for i in range(max_frame): + if i == 0: + img = plt.imshow(video[i]) + else: + img.set_data(video[i]) + toc = time.time() + clock_dt = toc - tic + tic = time.time() + # Real-time playback not always possible as clock_dt > .03 + plt.pause(max(0.01, 0.03 - clock_dt)) # Need min display time > 0.0. + plt.draw() + plt.waitforbuttonpress() + + +if __name__ == '__main__': + flags.mark_flag_as_required('filename') + app.run(main) diff --git a/local_dm_control_suite/demos/zeros.amc b/local_dm_control_suite/demos/zeros.amc new file mode 100755 index 0000000..b4590a4 --- /dev/null +++ b/local_dm_control_suite/demos/zeros.amc @@ -0,0 +1,213 @@ +#DUMMY AMC for testing +:FULLY-SPECIFIED +:DEGREES +1 +root 0 0 0 0 0 0 +lowerback 0 0 0 +upperback 0 0 0 +thorax 0 0 0 +lowerneck 0 0 0 +upperneck 0 0 0 +head 0 0 0 +rclavicle 0 0 +rhumerus 0 0 0 +rradius 0 +rwrist 0 +rhand 0 0 +rfingers 0 +rthumb 0 0 +lclavicle 0 0 +lhumerus 0 0 0 +lradius 0 +lwrist 0 +lhand 0 0 +lfingers 0 +lthumb 0 0 +rfemur 0 0 0 +rtibia 0 +rfoot 0 0 +rtoes 0 +lfemur 0 0 0 +ltibia 0 +lfoot 0 0 +ltoes 0 +2 +root 0 0 0 0 0 0 +lowerback 0 0 0 +upperback 0 0 0 +thorax 0 0 0 +lowerneck 0 0 0 +upperneck 0 0 0 +head 0 0 0 +rclavicle 0 0 +rhumerus 0 0 0 +rradius 0 +rwrist 0 +rhand 0 0 +rfingers 0 +rthumb 0 0 +lclavicle 0 0 +lhumerus 0 0 0 +lradius 0 +lwrist 0 +lhand 0 0 +lfingers 0 +lthumb 0 0 +rfemur 0 0 0 +rtibia 0 +rfoot 0 0 +rtoes 0 +lfemur 0 0 0 +ltibia 0 +lfoot 0 0 +ltoes 0 +3 +root 0 0 0 0 0 0 +lowerback 0 0 0 +upperback 0 0 0 +thorax 0 0 0 +lowerneck 0 0 0 +upperneck 0 0 0 +head 0 0 0 +rclavicle 0 0 +rhumerus 0 0 0 +rradius 0 +rwrist 0 +rhand 0 0 +rfingers 0 +rthumb 0 0 +lclavicle 0 0 +lhumerus 0 0 0 +lradius 0 +lwrist 0 +lhand 0 0 +lfingers 0 +lthumb 0 0 +rfemur 0 0 0 +rtibia 0 +rfoot 0 0 +rtoes 0 +lfemur 0 0 0 +ltibia 0 +lfoot 0 0 +ltoes 0 +4 +root 0 0 0 0 0 0 +lowerback 0 0 0 +upperback 0 0 0 +thorax 0 0 0 +lowerneck 0 0 0 +upperneck 0 0 0 +head 0 0 0 +rclavicle 0 0 +rhumerus 0 0 0 +rradius 0 +rwrist 0 +rhand 0 0 +rfingers 0 +rthumb 0 0 +lclavicle 0 0 +lhumerus 0 0 0 +lradius 0 +lwrist 0 +lhand 0 0 +lfingers 0 +lthumb 0 0 +rfemur 0 0 0 +rtibia 0 +rfoot 0 0 +rtoes 0 +lfemur 0 0 0 +ltibia 0 +lfoot 0 0 +ltoes 0 +5 +root 0 0 0 0 0 0 +lowerback 0 0 0 +upperback 0 0 0 +thorax 0 0 0 +lowerneck 0 0 0 +upperneck 0 0 0 +head 0 0 0 +rclavicle 0 0 +rhumerus 0 0 0 +rradius 0 +rwrist 0 +rhand 0 0 +rfingers 0 +rthumb 0 0 +lclavicle 0 0 +lhumerus 0 0 0 +lradius 0 +lwrist 0 +lhand 0 0 +lfingers 0 +lthumb 0 0 +rfemur 0 0 0 +rtibia 0 +rfoot 0 0 +rtoes 0 +lfemur 0 0 0 +ltibia 0 +lfoot 0 0 +ltoes 0 +6 +root 0 0 0 0 0 0 +lowerback 0 0 0 +upperback 0 0 0 +thorax 0 0 0 +lowerneck 0 0 0 +upperneck 0 0 0 +head 0 0 0 +rclavicle 0 0 +rhumerus 0 0 0 +rradius 0 +rwrist 0 +rhand 0 0 +rfingers 0 +rthumb 0 0 +lclavicle 0 0 +lhumerus 0 0 0 +lradius 0 +lwrist 0 +lhand 0 0 +lfingers 0 +lthumb 0 0 +rfemur 0 0 0 +rtibia 0 +rfoot 0 0 +rtoes 0 +lfemur 0 0 0 +ltibia 0 +lfoot 0 0 +ltoes 0 +7 +root 0 0 0 0 0 0 +lowerback 0 0 0 +upperback 0 0 0 +thorax 0 0 0 +lowerneck 0 0 0 +upperneck 0 0 0 +head 0 0 0 +rclavicle 0 0 +rhumerus 0 0 0 +rradius 0 +rwrist 0 +rhand 0 0 +rfingers 0 +rthumb 0 0 +lclavicle 0 0 +lhumerus 0 0 0 +lradius 0 +lwrist 0 +lhand 0 0 +lfingers 0 +lthumb 0 0 +rfemur 0 0 0 +rtibia 0 +rfoot 0 0 +rtoes 0 +lfemur 0 0 0 +ltibia 0 +lfoot 0 0 +ltoes 0 diff --git a/local_dm_control_suite/explore.py b/local_dm_control_suite/explore.py new file mode 100755 index 0000000..06fb0a8 --- /dev/null +++ b/local_dm_control_suite/explore.py @@ -0,0 +1,84 @@ +# Copyright 2018 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Control suite environments explorer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import app +from absl import flags +from dm_control import suite +from dm_control.suite.wrappers import action_noise +from six.moves import input + +from dm_control import viewer + + +_ALL_NAMES = ['.'.join(domain_task) for domain_task in suite.ALL_TASKS] + +flags.DEFINE_enum('environment_name', None, _ALL_NAMES, + 'Optional \'domain_name.task_name\' pair specifying the ' + 'environment to load. If unspecified a prompt will appear to ' + 'select one.') +flags.DEFINE_bool('timeout', True, 'Whether episodes should have a time limit.') +flags.DEFINE_bool('visualize_reward', True, + 'Whether to vary the colors of geoms according to the ' + 'current reward value.') +flags.DEFINE_float('action_noise', 0., + 'Standard deviation of Gaussian noise to apply to actions, ' + 'expressed as a fraction of the max-min range for each ' + 'action dimension. Defaults to 0, i.e. no noise.') +FLAGS = flags.FLAGS + + +def prompt_environment_name(prompt, values): + environment_name = None + while not environment_name: + environment_name = input(prompt) + if not environment_name or values.index(environment_name) < 0: + print('"%s" is not a valid environment name.' % environment_name) + environment_name = None + return environment_name + + +def main(argv): + del argv + environment_name = FLAGS.environment_name + if environment_name is None: + print('\n '.join(['Available environments:'] + _ALL_NAMES)) + environment_name = prompt_environment_name( + 'Please select an environment name: ', _ALL_NAMES) + + index = _ALL_NAMES.index(environment_name) + domain_name, task_name = suite.ALL_TASKS[index] + + task_kwargs = {} + if not FLAGS.timeout: + task_kwargs['time_limit'] = float('inf') + + def loader(): + env = suite.load( + domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs) + env.task.visualize_reward = FLAGS.visualize_reward + if FLAGS.action_noise > 0: + env = action_noise.Wrapper(env, scale=FLAGS.action_noise) + return env + + viewer.launch(loader) + + +if __name__ == '__main__': + app.run(main) diff --git a/local_dm_control_suite/finger.py b/local_dm_control_suite/finger.py new file mode 100755 index 0000000..e700db6 --- /dev/null +++ b/local_dm_control_suite/finger.py @@ -0,0 +1,217 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Finger Domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.suite.utils import randomizers +from dm_control.utils import containers +import numpy as np +from six.moves import range + +_DEFAULT_TIME_LIMIT = 20 # (seconds) +_CONTROL_TIMESTEP = .02 # (seconds) +# For TURN tasks, the 'tip' geom needs to enter a spherical target of sizes: +_EASY_TARGET_SIZE = 0.07 +_HARD_TARGET_SIZE = 0.03 +# Initial spin velocity for the Stop task. +_INITIAL_SPIN_VELOCITY = 100 +# Spinning slower than this value (radian/second) is considered stopped. +_STOP_VELOCITY = 1e-6 +# Spinning faster than this value (radian/second) is considered spinning. +_SPIN_VELOCITY = 15.0 + + +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('finger.xml'), common.ASSETS + + +@SUITE.add('benchmarking') +def spin(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Spin task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Spin(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add('benchmarking') +def turn_easy(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the easy Turn task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Turn(target_radius=_EASY_TARGET_SIZE, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add('benchmarking') +def turn_hard(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the hard Turn task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Turn(target_radius=_HARD_TARGET_SIZE, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Finger domain.""" + + def touch(self): + """Returns logarithmically scaled signals from the two touch sensors.""" + return np.log1p(self.named.data.sensordata[['touchtop', 'touchbottom']]) + + def hinge_velocity(self): + """Returns the velocity of the hinge joint.""" + return self.named.data.sensordata['hinge_velocity'] + + def tip_position(self): + """Returns the (x,z) position of the tip relative to the hinge.""" + return (self.named.data.sensordata['tip'][[0, 2]] - + self.named.data.sensordata['spinner'][[0, 2]]) + + def bounded_position(self): + """Returns the positions, with the hinge angle replaced by tip position.""" + return np.hstack((self.named.data.sensordata[['proximal', 'distal']], + self.tip_position())) + + def velocity(self): + """Returns the velocities (extracted from sensordata).""" + return self.named.data.sensordata[['proximal_velocity', + 'distal_velocity', + 'hinge_velocity']] + + def target_position(self): + """Returns the (x,z) position of the target relative to the hinge.""" + return (self.named.data.sensordata['target'][[0, 2]] - + self.named.data.sensordata['spinner'][[0, 2]]) + + def to_target(self): + """Returns the vector from the tip to the target.""" + return self.target_position() - self.tip_position() + + def dist_to_target(self): + """Returns the signed distance to the target surface, negative is inside.""" + return (np.linalg.norm(self.to_target()) - + self.named.model.site_size['target', 0]) + + +class Spin(base.Task): + """A Finger `Task` to spin the stopped body.""" + + def __init__(self, random=None): + """Initializes a new `Spin` instance. + + Args: + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + super(Spin, self).__init__(random=random) + + def initialize_episode(self, physics): + physics.named.model.site_rgba['target', 3] = 0 + physics.named.model.site_rgba['tip', 3] = 0 + physics.named.model.dof_damping['hinge'] = .03 + _set_random_joint_angles(physics, self.random) + super(Spin, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns state and touch sensors, and target info.""" + obs = collections.OrderedDict() + obs['position'] = physics.bounded_position() + obs['velocity'] = physics.velocity() + obs['touch'] = physics.touch() + return obs + + def get_reward(self, physics): + """Returns a sparse reward.""" + return float(physics.hinge_velocity() <= -_SPIN_VELOCITY) + + +class Turn(base.Task): + """A Finger `Task` to turn the body to a target angle.""" + + def __init__(self, target_radius, random=None): + """Initializes a new `Turn` instance. + + Args: + target_radius: Radius of the target site, which specifies the goal angle. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._target_radius = target_radius + super(Turn, self).__init__(random=random) + + def initialize_episode(self, physics): + target_angle = self.random.uniform(-np.pi, np.pi) + hinge_x, hinge_z = physics.named.data.xanchor['hinge', ['x', 'z']] + radius = physics.named.model.geom_size['cap1'].sum() + target_x = hinge_x + radius * np.sin(target_angle) + target_z = hinge_z + radius * np.cos(target_angle) + physics.named.model.site_pos['target', ['x', 'z']] = target_x, target_z + physics.named.model.site_size['target', 0] = self._target_radius + + _set_random_joint_angles(physics, self.random) + + super(Turn, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns state, touch sensors, and target info.""" + obs = collections.OrderedDict() + obs['position'] = physics.bounded_position() + obs['velocity'] = physics.velocity() + obs['touch'] = physics.touch() + obs['target_position'] = physics.target_position() + obs['dist_to_target'] = physics.dist_to_target() + return obs + + def get_reward(self, physics): + return float(physics.dist_to_target() <= 0) + + +def _set_random_joint_angles(physics, random, max_attempts=1000): + """Sets the joints to a random collision-free state.""" + + for _ in range(max_attempts): + randomizers.randomize_limited_and_rotational_joints(physics, random) + # Check for collisions. + physics.after_reset() + if physics.data.ncon == 0: + break + else: + raise RuntimeError('Could not find a collision-free state ' + 'after {} attempts'.format(max_attempts)) diff --git a/local_dm_control_suite/finger.xml b/local_dm_control_suite/finger.xml new file mode 100755 index 0000000..3b35986 --- /dev/null +++ b/local_dm_control_suite/finger.xml @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/local_dm_control_suite/fish.py b/local_dm_control_suite/fish.py new file mode 100755 index 0000000..3262def --- /dev/null +++ b/local_dm_control_suite/fish.py @@ -0,0 +1,176 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Fish Domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import rewards +import numpy as np + + +_DEFAULT_TIME_LIMIT = 40 +_CONTROL_TIMESTEP = .04 +_JOINTS = ['tail1', + 'tail_twist', + 'tail2', + 'finright_roll', + 'finright_pitch', + 'finleft_roll', + 'finleft_pitch'] +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('fish.xml'), common.ASSETS + + +@SUITE.add('benchmarking') +def upright(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the Fish Upright task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Upright(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit, + **environment_kwargs) + + +@SUITE.add('benchmarking') +def swim(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Fish Swim task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Swim(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Fish domain.""" + + def upright(self): + """Returns projection from z-axes of torso to the z-axes of worldbody.""" + return self.named.data.xmat['torso', 'zz'] + + def torso_velocity(self): + """Returns velocities and angular velocities of the torso.""" + return self.data.sensordata + + def joint_velocities(self): + """Returns the joint velocities.""" + return self.named.data.qvel[_JOINTS] + + def joint_angles(self): + """Returns the joint positions.""" + return self.named.data.qpos[_JOINTS] + + def mouth_to_target(self): + """Returns a vector, from mouth to target in local coordinate of mouth.""" + data = self.named.data + mouth_to_target_global = data.geom_xpos['target'] - data.geom_xpos['mouth'] + return mouth_to_target_global.dot(data.geom_xmat['mouth'].reshape(3, 3)) + + +class Upright(base.Task): + """A Fish `Task` for getting the torso upright with smooth reward.""" + + def __init__(self, random=None): + """Initializes an instance of `Upright`. + + Args: + random: Either an existing `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically. + """ + super(Upright, self).__init__(random=random) + + def initialize_episode(self, physics): + """Randomizes the tail and fin angles and the orientation of the Fish.""" + quat = self.random.randn(4) + physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat) + for joint in _JOINTS: + physics.named.data.qpos[joint] = self.random.uniform(-.2, .2) + # Hide the target. It's irrelevant for this task. + physics.named.model.geom_rgba['target', 3] = 0 + super(Upright, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of joint angles, velocities and uprightness.""" + obs = collections.OrderedDict() + obs['joint_angles'] = physics.joint_angles() + obs['upright'] = physics.upright() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a smooth reward.""" + return rewards.tolerance(physics.upright(), bounds=(1, 1), margin=1) + + +class Swim(base.Task): + """A Fish `Task` for swimming with smooth reward.""" + + def __init__(self, random=None): + """Initializes an instance of `Swim`. + + Args: + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + super(Swim, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode.""" + + quat = self.random.randn(4) + physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat) + for joint in _JOINTS: + physics.named.data.qpos[joint] = self.random.uniform(-.2, .2) + # Randomize target position. + physics.named.model.geom_pos['target', 'x'] = self.random.uniform(-.4, .4) + physics.named.model.geom_pos['target', 'y'] = self.random.uniform(-.4, .4) + physics.named.model.geom_pos['target', 'z'] = self.random.uniform(.1, .3) + super(Swim, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of joints, target direction and velocities.""" + obs = collections.OrderedDict() + obs['joint_angles'] = physics.joint_angles() + obs['upright'] = physics.upright() + obs['target'] = physics.mouth_to_target() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a smooth reward.""" + radii = physics.named.model.geom_size[['mouth', 'target'], 0].sum() + in_target = rewards.tolerance(np.linalg.norm(physics.mouth_to_target()), + bounds=(0, radii), margin=2*radii) + is_upright = 0.5 * (physics.upright() + 1) + return (7*in_target + is_upright) / 8 diff --git a/local_dm_control_suite/fish.xml b/local_dm_control_suite/fish.xml new file mode 100755 index 0000000..43de56d --- /dev/null +++ b/local_dm_control_suite/fish.xml @@ -0,0 +1,85 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/local_dm_control_suite/hopper.py b/local_dm_control_suite/hopper.py new file mode 100755 index 0000000..6458e41 --- /dev/null +++ b/local_dm_control_suite/hopper.py @@ -0,0 +1,138 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Hopper domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.suite.utils import randomizers +from dm_control.utils import containers +from dm_control.utils import rewards +import numpy as np + + +SUITE = containers.TaggedTasks() + +_CONTROL_TIMESTEP = .02 # (Seconds) + +# Default duration of an episode, in seconds. +_DEFAULT_TIME_LIMIT = 20 + +# Minimal height of torso over foot above which stand reward is 1. +_STAND_HEIGHT = 0.6 + +# Hopping speed above which hop reward is 1. +_HOP_SPEED = 2 + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('hopper.xml'), common.ASSETS + + +@SUITE.add('benchmarking') +def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns a Hopper that strives to stand upright, balancing its pose.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Hopper(hopping=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add('benchmarking') +def hop(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns a Hopper that strives to hop forward.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Hopper(hopping=True, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Hopper domain.""" + + def height(self): + """Returns height of torso with respect to foot.""" + return (self.named.data.xipos['torso', 'z'] - + self.named.data.xipos['foot', 'z']) + + def speed(self): + """Returns horizontal speed of the Hopper.""" + return self.named.data.sensordata['torso_subtreelinvel'][0] + + def touch(self): + """Returns the signals from two foot touch sensors.""" + return np.log1p(self.named.data.sensordata[['touch_toe', 'touch_heel']]) + + +class Hopper(base.Task): + """A Hopper's `Task` to train a standing and a jumping Hopper.""" + + def __init__(self, hopping, random=None): + """Initialize an instance of `Hopper`. + + Args: + hopping: Boolean, if True the task is to hop forwards, otherwise it is to + balance upright. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._hopping = hopping + super(Hopper, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode.""" + randomizers.randomize_limited_and_rotational_joints(physics, self.random) + self._timeout_progress = 0 + super(Hopper, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of positions, velocities and touch sensors.""" + obs = collections.OrderedDict() + # Ignores horizontal position to maintain translational invariance: + obs['position'] = physics.data.qpos[1:].copy() + obs['velocity'] = physics.velocity() + obs['touch'] = physics.touch() + return obs + + def get_reward(self, physics): + """Returns a reward applicable to the performed task.""" + standing = rewards.tolerance(physics.height(), (_STAND_HEIGHT, 2)) + if self._hopping: + hopping = rewards.tolerance(physics.speed(), + bounds=(_HOP_SPEED, float('inf')), + margin=_HOP_SPEED/2, + value_at_margin=0.5, + sigmoid='linear') + return standing * hopping + else: + small_control = rewards.tolerance(physics.control(), + margin=1, value_at_margin=0, + sigmoid='quadratic').mean() + small_control = (small_control + 4) / 5 + return standing * small_control diff --git a/local_dm_control_suite/hopper.xml b/local_dm_control_suite/hopper.xml new file mode 100755 index 0000000..0c8ec28 --- /dev/null +++ b/local_dm_control_suite/hopper.xml @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/local_dm_control_suite/humanoid.py b/local_dm_control_suite/humanoid.py new file mode 100755 index 0000000..5a161f0 --- /dev/null +++ b/local_dm_control_suite/humanoid.py @@ -0,0 +1,211 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Humanoid Domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.suite.utils import randomizers +from dm_control.utils import containers +from dm_control.utils import rewards +import numpy as np + +_DEFAULT_TIME_LIMIT = 25 +_CONTROL_TIMESTEP = .025 + +# Height of head above which stand reward is 1. +_STAND_HEIGHT = 1.4 + +# Horizontal speeds above which move reward is 1. +_WALK_SPEED = 1 +_RUN_SPEED = 10 + + +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('humanoid.xml'), common.ASSETS + + +@SUITE.add('benchmarking') +def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Stand task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Humanoid(move_speed=0, pure_state=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add('benchmarking') +def walk(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Walk task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Humanoid(move_speed=_WALK_SPEED, pure_state=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add('benchmarking') +def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Run task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Humanoid(move_speed=_RUN_SPEED, pure_state=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add() +def run_pure_state(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the Run task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Humanoid(move_speed=_RUN_SPEED, pure_state=True, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Walker domain.""" + + def torso_upright(self): + """Returns projection from z-axes of torso to the z-axes of world.""" + return self.named.data.xmat['torso', 'zz'] + + def head_height(self): + """Returns the height of the torso.""" + return self.named.data.xpos['head', 'z'] + + def center_of_mass_position(self): + """Returns position of the center-of-mass.""" + return self.named.data.subtree_com['torso'].copy() + + def center_of_mass_velocity(self): + """Returns the velocity of the center-of-mass.""" + return self.named.data.sensordata['torso_subtreelinvel'].copy() + + def torso_vertical_orientation(self): + """Returns the z-projection of the torso orientation matrix.""" + return self.named.data.xmat['torso', ['zx', 'zy', 'zz']] + + def joint_angles(self): + """Returns the state without global orientation or position.""" + return self.data.qpos[7:].copy() # Skip the 7 DoFs of the free root joint. + + def extremities(self): + """Returns end effector positions in egocentric frame.""" + torso_frame = self.named.data.xmat['torso'].reshape(3, 3) + torso_pos = self.named.data.xpos['torso'] + positions = [] + for side in ('left_', 'right_'): + for limb in ('hand', 'foot'): + torso_to_limb = self.named.data.xpos[side + limb] - torso_pos + positions.append(torso_to_limb.dot(torso_frame)) + return np.hstack(positions) + + +class Humanoid(base.Task): + """A humanoid task.""" + + def __init__(self, move_speed, pure_state, random=None): + """Initializes an instance of `Humanoid`. + + Args: + move_speed: A float. If this value is zero, reward is given simply for + standing up. Otherwise this specifies a target horizontal velocity for + the walking task. + pure_state: A bool. Whether the observations consist of the pure MuJoCo + state or includes some useful features thereof. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._move_speed = move_speed + self._pure_state = pure_state + super(Humanoid, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Args: + physics: An instance of `Physics`. + + """ + # Find a collision-free random initial configuration. + penetrating = True + while penetrating: + randomizers.randomize_limited_and_rotational_joints(physics, self.random) + # Check for collisions. + physics.after_reset() + penetrating = physics.data.ncon > 0 + super(Humanoid, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns either the pure state or a set of egocentric features.""" + obs = collections.OrderedDict() + if self._pure_state: + obs['position'] = physics.position() + obs['velocity'] = physics.velocity() + else: + obs['joint_angles'] = physics.joint_angles() + obs['head_height'] = physics.head_height() + obs['extremities'] = physics.extremities() + obs['torso_vertical'] = physics.torso_vertical_orientation() + obs['com_velocity'] = physics.center_of_mass_velocity() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a reward to the agent.""" + standing = rewards.tolerance(physics.head_height(), + bounds=(_STAND_HEIGHT, float('inf')), + margin=_STAND_HEIGHT/4) + upright = rewards.tolerance(physics.torso_upright(), + bounds=(0.9, float('inf')), sigmoid='linear', + margin=1.9, value_at_margin=0) + stand_reward = standing * upright + small_control = rewards.tolerance(physics.control(), margin=1, + value_at_margin=0, + sigmoid='quadratic').mean() + small_control = (4 + small_control) / 5 + if self._move_speed == 0: + horizontal_velocity = physics.center_of_mass_velocity()[[0, 1]] + dont_move = rewards.tolerance(horizontal_velocity, margin=2).mean() + return small_control * stand_reward * dont_move + else: + com_velocity = np.linalg.norm(physics.center_of_mass_velocity()[[0, 1]]) + move = rewards.tolerance(com_velocity, + bounds=(self._move_speed, float('inf')), + margin=self._move_speed, value_at_margin=0, + sigmoid='linear') + move = (5*move + 1) / 6 + return small_control * stand_reward * move diff --git a/local_dm_control_suite/humanoid.xml b/local_dm_control_suite/humanoid.xml new file mode 100755 index 0000000..32b84c5 --- /dev/null +++ b/local_dm_control_suite/humanoid.xml @@ -0,0 +1,202 @@ + + + + + + + + + diff --git a/local_dm_control_suite/humanoid_CMU.py b/local_dm_control_suite/humanoid_CMU.py new file mode 100755 index 0000000..d06fb63 --- /dev/null +++ b/local_dm_control_suite/humanoid_CMU.py @@ -0,0 +1,179 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Humanoid_CMU Domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.suite.utils import randomizers +from dm_control.utils import containers +from dm_control.utils import rewards +import numpy as np + +_DEFAULT_TIME_LIMIT = 20 +_CONTROL_TIMESTEP = 0.02 + +# Height of head above which stand reward is 1. +_STAND_HEIGHT = 1.4 + +# Horizontal speeds above which move reward is 1. +_WALK_SPEED = 1 +_RUN_SPEED = 10 + +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('humanoid_CMU.xml'), common.ASSETS + + +@SUITE.add() +def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Stand task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = HumanoidCMU(move_speed=0, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add() +def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Run task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = HumanoidCMU(move_speed=_RUN_SPEED, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the humanoid_CMU domain.""" + + def thorax_upright(self): + """Returns projection from y-axes of thorax to the z-axes of world.""" + return self.named.data.xmat['thorax', 'zy'] + + def head_height(self): + """Returns the height of the head.""" + return self.named.data.xpos['head', 'z'] + + def center_of_mass_position(self): + """Returns position of the center-of-mass.""" + return self.named.data.subtree_com['thorax'] + + def center_of_mass_velocity(self): + """Returns the velocity of the center-of-mass.""" + return self.named.data.sensordata['thorax_subtreelinvel'].copy() + + def torso_vertical_orientation(self): + """Returns the z-projection of the thorax orientation matrix.""" + return self.named.data.xmat['thorax', ['zx', 'zy', 'zz']] + + def joint_angles(self): + """Returns the state without global orientation or position.""" + return self.data.qpos[7:].copy() # Skip the 7 DoFs of the free root joint. + + def extremities(self): + """Returns end effector positions in egocentric frame.""" + torso_frame = self.named.data.xmat['thorax'].reshape(3, 3) + torso_pos = self.named.data.xpos['thorax'] + positions = [] + for side in ('l', 'r'): + for limb in ('hand', 'foot'): + torso_to_limb = self.named.data.xpos[side + limb] - torso_pos + positions.append(torso_to_limb.dot(torso_frame)) + return np.hstack(positions) + + +class HumanoidCMU(base.Task): + """A task for the CMU Humanoid.""" + + def __init__(self, move_speed, random=None): + """Initializes an instance of `Humanoid_CMU`. + + Args: + move_speed: A float. If this value is zero, reward is given simply for + standing up. Otherwise this specifies a target horizontal velocity for + the walking task. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._move_speed = move_speed + super(HumanoidCMU, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets a random collision-free configuration at the start of each episode. + + Args: + physics: An instance of `Physics`. + """ + penetrating = True + while penetrating: + randomizers.randomize_limited_and_rotational_joints( + physics, self.random) + # Check for collisions. + physics.after_reset() + penetrating = physics.data.ncon > 0 + super(HumanoidCMU, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns a set of egocentric features.""" + obs = collections.OrderedDict() + obs['joint_angles'] = physics.joint_angles() + obs['head_height'] = physics.head_height() + obs['extremities'] = physics.extremities() + obs['torso_vertical'] = physics.torso_vertical_orientation() + obs['com_velocity'] = physics.center_of_mass_velocity() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a reward to the agent.""" + standing = rewards.tolerance(physics.head_height(), + bounds=(_STAND_HEIGHT, float('inf')), + margin=_STAND_HEIGHT/4) + upright = rewards.tolerance(physics.thorax_upright(), + bounds=(0.9, float('inf')), sigmoid='linear', + margin=1.9, value_at_margin=0) + stand_reward = standing * upright + small_control = rewards.tolerance(physics.control(), margin=1, + value_at_margin=0, + sigmoid='quadratic').mean() + small_control = (4 + small_control) / 5 + if self._move_speed == 0: + horizontal_velocity = physics.center_of_mass_velocity()[[0, 1]] + dont_move = rewards.tolerance(horizontal_velocity, margin=2).mean() + return small_control * stand_reward * dont_move + else: + com_velocity = np.linalg.norm(physics.center_of_mass_velocity()[[0, 1]]) + move = rewards.tolerance(com_velocity, + bounds=(self._move_speed, float('inf')), + margin=self._move_speed, value_at_margin=0, + sigmoid='linear') + move = (5*move + 1) / 6 + return small_control * stand_reward * move diff --git a/local_dm_control_suite/humanoid_CMU.xml b/local_dm_control_suite/humanoid_CMU.xml new file mode 100755 index 0000000..9a41a16 --- /dev/null +++ b/local_dm_control_suite/humanoid_CMU.xml @@ -0,0 +1,289 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/local_dm_control_suite/lqr.py b/local_dm_control_suite/lqr.py new file mode 100755 index 0000000..34197b4 --- /dev/null +++ b/local_dm_control_suite/lqr.py @@ -0,0 +1,272 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Procedurally generated LQR domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import os + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import xml_tools +from lxml import etree +import numpy as np +from six.moves import range + +from dm_control.utils import io as resources + +_DEFAULT_TIME_LIMIT = float('inf') +_CONTROL_COST_COEF = 0.1 +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(n_bodies, n_actuators, random): + """Returns the model description as an XML string and a dict of assets. + + Args: + n_bodies: An int, number of bodies of the LQR. + n_actuators: An int, number of actuated bodies of the LQR. `n_actuators` + should be less or equal than `n_bodies`. + random: A `numpy.random.RandomState` instance. + + Returns: + A tuple `(model_xml_string, assets)`, where `assets` is a dict consisting of + `{filename: contents_string}` pairs. + """ + return _make_model(n_bodies, n_actuators, random), common.ASSETS + + +@SUITE.add() +def lqr_2_1(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns an LQR environment with 2 bodies of which the first is actuated.""" + return _make_lqr(n_bodies=2, + n_actuators=1, + control_cost_coef=_CONTROL_COST_COEF, + time_limit=time_limit, + random=random, + environment_kwargs=environment_kwargs) + + +@SUITE.add() +def lqr_6_2(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns an LQR environment with 6 bodies of which first 2 are actuated.""" + return _make_lqr(n_bodies=6, + n_actuators=2, + control_cost_coef=_CONTROL_COST_COEF, + time_limit=time_limit, + random=random, + environment_kwargs=environment_kwargs) + + +def _make_lqr(n_bodies, n_actuators, control_cost_coef, time_limit, random, + environment_kwargs): + """Returns a LQR environment. + + Args: + n_bodies: An int, number of bodies of the LQR. + n_actuators: An int, number of actuated bodies of the LQR. `n_actuators` + should be less or equal than `n_bodies`. + control_cost_coef: A number, the coefficient of the control cost. + time_limit: An int, maximum time for each episode in seconds. + random: Either an existing `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically. + environment_kwargs: A `dict` specifying keyword arguments for the + environment, or None. + + Returns: + A LQR environment with `n_bodies` bodies of which first `n_actuators` are + actuated. + """ + + if not isinstance(random, np.random.RandomState): + random = np.random.RandomState(random) + + model_string, assets = get_model_and_assets(n_bodies, n_actuators, + random=random) + physics = Physics.from_xml_string(model_string, assets=assets) + task = LQRLevel(control_cost_coef, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + **environment_kwargs) + + +def _make_body(body_id, stiffness_range, damping_range, random): + """Returns an `etree.Element` defining a body. + + Args: + body_id: Id of the created body. + stiffness_range: A tuple of (stiffness_lower_bound, stiffness_uppder_bound). + The stiffness of the joint is drawn uniformly from this range. + damping_range: A tuple of (damping_lower_bound, damping_upper_bound). The + damping of the joint is drawn uniformly from this range. + random: A `numpy.random.RandomState` instance. + + Returns: + A new instance of `etree.Element`. A body element with two children: joint + and geom. + """ + body_name = 'body_{}'.format(body_id) + joint_name = 'joint_{}'.format(body_id) + geom_name = 'geom_{}'.format(body_id) + + body = etree.Element('body', name=body_name) + body.set('pos', '.25 0 0') + joint = etree.SubElement(body, 'joint', name=joint_name) + body.append(etree.Element('geom', name=geom_name)) + joint.set('stiffness', + str(random.uniform(stiffness_range[0], stiffness_range[1]))) + joint.set('damping', + str(random.uniform(damping_range[0], damping_range[1]))) + return body + + +def _make_model(n_bodies, + n_actuators, + random, + stiffness_range=(15, 25), + damping_range=(0, 0)): + """Returns an MJCF XML string defining a model of springs and dampers. + + Args: + n_bodies: An integer, the number of bodies (DoFs) in the system. + n_actuators: An integer, the number of actuated bodies. + random: A `numpy.random.RandomState` instance. + stiffness_range: A tuple containing minimum and maximum stiffness. Each + joint's stiffness is sampled uniformly from this interval. + damping_range: A tuple containing minimum and maximum damping. Each joint's + damping is sampled uniformly from this interval. + + Returns: + An MJCF string describing the linear system. + + Raises: + ValueError: If the number of bodies or actuators is erronous. + """ + if n_bodies < 1 or n_actuators < 1: + raise ValueError('At least 1 body and 1 actuator required.') + if n_actuators > n_bodies: + raise ValueError('At most 1 actuator per body.') + + file_path = os.path.join(os.path.dirname(__file__), 'lqr.xml') + with resources.GetResourceAsFile(file_path) as xml_file: + mjcf = xml_tools.parse(xml_file) + parent = mjcf.find('./worldbody') + actuator = etree.SubElement(mjcf.getroot(), 'actuator') + tendon = etree.SubElement(mjcf.getroot(), 'tendon') + + for body in range(n_bodies): + # Inserting body. + child = _make_body(body, stiffness_range, damping_range, random) + site_name = 'site_{}'.format(body) + child.append(etree.Element('site', name=site_name)) + + if body == 0: + child.set('pos', '.25 0 .1') + # Add actuators to the first n_actuators bodies. + if body < n_actuators: + # Adding actuator. + joint_name = 'joint_{}'.format(body) + motor_name = 'motor_{}'.format(body) + child.find('joint').set('name', joint_name) + actuator.append(etree.Element('motor', name=motor_name, joint=joint_name)) + + # Add a tendon between consecutive bodies (for visualisation purposes only). + if body < n_bodies - 1: + child_site_name = 'site_{}'.format(body + 1) + tendon_name = 'tendon_{}'.format(body) + spatial = etree.SubElement(tendon, 'spatial', name=tendon_name) + spatial.append(etree.Element('site', site=site_name)) + spatial.append(etree.Element('site', site=child_site_name)) + parent.append(child) + parent = child + + return etree.tostring(mjcf, pretty_print=True) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the LQR domain.""" + + def state_norm(self): + """Returns the norm of the physics state.""" + return np.linalg.norm(self.state()) + + +class LQRLevel(base.Task): + """A Linear Quadratic Regulator `Task`.""" + + _TERMINAL_TOL = 1e-6 + + def __init__(self, control_cost_coef, random=None): + """Initializes an LQR level with cost = sum(states^2) + c*sum(controls^2). + + Args: + control_cost_coef: The coefficient of the control cost. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + + Raises: + ValueError: If the control cost coefficient is not positive. + """ + if control_cost_coef <= 0: + raise ValueError('control_cost_coef must be positive.') + + self._control_cost_coef = control_cost_coef + super(LQRLevel, self).__init__(random=random) + + @property + def control_cost_coef(self): + return self._control_cost_coef + + def initialize_episode(self, physics): + """Random state sampled from a unit sphere.""" + ndof = physics.model.nq + unit = self.random.randn(ndof) + physics.data.qpos[:] = np.sqrt(2) * unit / np.linalg.norm(unit) + super(LQRLevel, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of the state.""" + obs = collections.OrderedDict() + obs['position'] = physics.position() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a quadratic state and control reward.""" + position = physics.position() + state_cost = 0.5 * np.dot(position, position) + control_signal = physics.control() + control_l2_norm = 0.5 * np.dot(control_signal, control_signal) + return 1 - (state_cost + control_l2_norm * self._control_cost_coef) + + def get_evaluation(self, physics): + """Returns a sparse evaluation reward that is not used for learning.""" + return float(physics.state_norm() <= 0.01) + + def get_termination(self, physics): + """Terminates when the state norm is smaller than epsilon.""" + if physics.state_norm() < self._TERMINAL_TOL: + return 0.0 diff --git a/local_dm_control_suite/lqr.xml b/local_dm_control_suite/lqr.xml new file mode 100755 index 0000000..d403532 --- /dev/null +++ b/local_dm_control_suite/lqr.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + diff --git a/local_dm_control_suite/lqr_solver.py b/local_dm_control_suite/lqr_solver.py new file mode 100755 index 0000000..3935c7d --- /dev/null +++ b/local_dm_control_suite/lqr_solver.py @@ -0,0 +1,142 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +r"""Optimal policy for LQR levels. + +LQR control problem is described in +https://en.wikipedia.org/wiki/Linear-quadratic_regulator#Infinite-horizon.2C_discrete-time_LQR +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import logging +from dm_control.mujoco import wrapper +import numpy as np +from six.moves import range + +try: + import scipy.linalg as sp # pylint: disable=g-import-not-at-top +except ImportError: + sp = None + + +def _solve_dare(a, b, q, r): + """Solves the Discrete-time Algebraic Riccati Equation (DARE) by iteration. + + Algebraic Riccati Equation: + ```none + P_{t-1} = Q + A' * P_{t} * A - + A' * P_{t} * B * (R + B' * P_{t} * B)^{-1} * B' * P_{t} * A + ``` + + Args: + a: A 2 dimensional numpy array, transition matrix A. + b: A 2 dimensional numpy array, control matrix B. + q: A 2 dimensional numpy array, symmetric positive definite cost matrix. + r: A 2 dimensional numpy array, symmetric positive definite cost matrix + + Returns: + A numpy array, a real symmetric matrix P which is the solution to DARE. + + Raises: + RuntimeError: If the computed P matrix is not symmetric and + positive-definite. + """ + p = np.eye(len(a)) + for _ in range(1000000): + a_p = a.T.dot(p) # A' * P_t + a_p_b = np.dot(a_p, b) # A' * P_t * B + # Algebraic Riccati Equation. + p_next = q + np.dot(a_p, a) - a_p_b.dot( + np.linalg.solve(b.T.dot(p.dot(b)) + r, a_p_b.T)) + p_next += p_next.T + p_next *= .5 + if np.abs(p - p_next).max() < 1e-12: + break + p = p_next + else: + logging.warning('DARE solver did not converge') + try: + # Check that the result is symmetric and positive-definite. + np.linalg.cholesky(p_next) + except np.linalg.LinAlgError: + raise RuntimeError('ARE solver failed: P matrix is not symmetric and ' + 'positive-definite.') + return p_next + + +def solve(env): + """Returns the optimal value and policy for LQR problem. + + Args: + env: An instance of `control.EnvironmentV2` with LQR level. + + Returns: + p: A numpy array, the Hessian of the optimal total cost-to-go (value + function at state x) is V(x) = .5 * x' * p * x. + k: A numpy array which gives the optimal linear policy u = k * x. + beta: The maximum eigenvalue of (a + b * k). Under optimal policy, at + timestep n the state tends to 0 like beta^n. + + Raises: + RuntimeError: If the controlled system is unstable. + """ + n = env.physics.model.nq # number of DoFs + m = env.physics.model.nu # number of controls + + # Compute the mass matrix. + mass = np.zeros((n, n)) + wrapper.mjbindings.mjlib.mj_fullM(env.physics.model.ptr, mass, + env.physics.data.qM) + + # Compute input matrices a, b, q and r to the DARE solvers. + # State transition matrix a. + stiffness = np.diag(env.physics.model.jnt_stiffness.ravel()) + damping = np.diag(env.physics.model.dof_damping.ravel()) + dt = env.physics.model.opt.timestep + + j = np.linalg.solve(-mass, np.hstack((stiffness, damping))) + a = np.eye(2 * n) + dt * np.vstack( + (dt * j + np.hstack((np.zeros((n, n)), np.eye(n))), j)) + + # Control transition matrix b. + b = env.physics.data.actuator_moment.T + bc = np.linalg.solve(mass, b) + b = dt * np.vstack((dt * bc, bc)) + + # State cost Hessian q. + q = np.diag(np.hstack([np.ones(n), np.zeros(n)])) + + # Control cost Hessian r. + r = env.task.control_cost_coef * np.eye(m) + + if sp: + # Use scipy's faster DARE solver if available. + solve_dare = sp.solve_discrete_are + else: + # Otherwise fall back on a slower internal implementation. + solve_dare = _solve_dare + + # Solve the discrete algebraic Riccati equation. + p = solve_dare(a, b, q, r) + k = -np.linalg.solve(b.T.dot(p.dot(b)) + r, b.T.dot(p.dot(a))) + + # Under optimal policy, state tends to 0 like beta^n_timesteps + beta = np.abs(np.linalg.eigvals(a + b.dot(k))).max() + if beta >= 1.0: + raise RuntimeError('Controlled system is unstable.') + return p, k, beta diff --git a/local_dm_control_suite/manipulator.py b/local_dm_control_suite/manipulator.py new file mode 100755 index 0000000..b2ed31f --- /dev/null +++ b/local_dm_control_suite/manipulator.py @@ -0,0 +1,290 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Planar Manipulator domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import rewards +from dm_control.utils import xml_tools + +from lxml import etree +import numpy as np + +_CLOSE = .01 # (Meters) Distance below which a thing is considered close. +_CONTROL_TIMESTEP = .01 # (Seconds) +_TIME_LIMIT = 10 # (Seconds) +_P_IN_HAND = .1 # Probabillity of object-in-hand initial state +_P_IN_TARGET = .1 # Probabillity of object-in-target initial state +_ARM_JOINTS = ['arm_root', 'arm_shoulder', 'arm_elbow', 'arm_wrist', + 'finger', 'fingertip', 'thumb', 'thumbtip'] +_ALL_PROPS = frozenset(['ball', 'target_ball', 'cup', + 'peg', 'target_peg', 'slot']) + +SUITE = containers.TaggedTasks() + + +def make_model(use_peg, insert): + """Returns a tuple containing the model XML string and a dict of assets.""" + xml_string = common.read_model('manipulator.xml') + parser = etree.XMLParser(remove_blank_text=True) + mjcf = etree.XML(xml_string, parser) + + # Select the desired prop. + if use_peg: + required_props = ['peg', 'target_peg'] + if insert: + required_props += ['slot'] + else: + required_props = ['ball', 'target_ball'] + if insert: + required_props += ['cup'] + + # Remove unused props + for unused_prop in _ALL_PROPS.difference(required_props): + prop = xml_tools.find_element(mjcf, 'body', unused_prop) + prop.getparent().remove(prop) + + return etree.tostring(mjcf, pretty_print=True), common.ASSETS + + +@SUITE.add('benchmarking', 'hard') +def bring_ball(fully_observable=True, time_limit=_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns manipulator bring task with the ball prop.""" + use_peg = False + insert = False + physics = Physics.from_xml_string(*make_model(use_peg, insert)) + task = Bring(use_peg=use_peg, insert=insert, + fully_observable=fully_observable, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit, + **environment_kwargs) + + +@SUITE.add('hard') +def bring_peg(fully_observable=True, time_limit=_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns manipulator bring task with the peg prop.""" + use_peg = True + insert = False + physics = Physics.from_xml_string(*make_model(use_peg, insert)) + task = Bring(use_peg=use_peg, insert=insert, + fully_observable=fully_observable, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit, + **environment_kwargs) + + +@SUITE.add('hard') +def insert_ball(fully_observable=True, time_limit=_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns manipulator insert task with the ball prop.""" + use_peg = False + insert = True + physics = Physics.from_xml_string(*make_model(use_peg, insert)) + task = Bring(use_peg=use_peg, insert=insert, + fully_observable=fully_observable, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit, + **environment_kwargs) + + +@SUITE.add('hard') +def insert_peg(fully_observable=True, time_limit=_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns manipulator insert task with the peg prop.""" + use_peg = True + insert = True + physics = Physics.from_xml_string(*make_model(use_peg, insert)) + task = Bring(use_peg=use_peg, insert=insert, + fully_observable=fully_observable, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics with additional features for the Planar Manipulator domain.""" + + def bounded_joint_pos(self, joint_names): + """Returns joint positions as (sin, cos) values.""" + joint_pos = self.named.data.qpos[joint_names] + return np.vstack([np.sin(joint_pos), np.cos(joint_pos)]).T + + def joint_vel(self, joint_names): + """Returns joint velocities.""" + return self.named.data.qvel[joint_names] + + def body_2d_pose(self, body_names, orientation=True): + """Returns positions and/or orientations of bodies.""" + if not isinstance(body_names, str): + body_names = np.array(body_names).reshape(-1, 1) # Broadcast indices. + pos = self.named.data.xpos[body_names, ['x', 'z']] + if orientation: + ori = self.named.data.xquat[body_names, ['qw', 'qy']] + return np.hstack([pos, ori]) + else: + return pos + + def touch(self): + return np.log1p(self.data.sensordata) + + def site_distance(self, site1, site2): + site1_to_site2 = np.diff(self.named.data.site_xpos[[site2, site1]], axis=0) + return np.linalg.norm(site1_to_site2) + + +class Bring(base.Task): + """A Bring `Task`: bring the prop to the target.""" + + def __init__(self, use_peg, insert, fully_observable, random=None): + """Initialize an instance of the `Bring` task. + + Args: + use_peg: A `bool`, whether to replace the ball prop with the peg prop. + insert: A `bool`, whether to insert the prop in a receptacle. + fully_observable: A `bool`, whether the observation should contain the + position and velocity of the object being manipulated and the target + location. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._use_peg = use_peg + self._target = 'target_peg' if use_peg else 'target_ball' + self._object = 'peg' if self._use_peg else 'ball' + self._object_joints = ['_'.join([self._object, dim]) for dim in 'xzy'] + self._receptacle = 'slot' if self._use_peg else 'cup' + self._insert = insert + self._fully_observable = fully_observable + super(Bring, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode.""" + # Local aliases + choice = self.random.choice + uniform = self.random.uniform + model = physics.named.model + data = physics.named.data + + # Find a collision-free random initial configuration. + penetrating = True + while penetrating: + + # Randomise angles of arm joints. + is_limited = model.jnt_limited[_ARM_JOINTS].astype(np.bool) + joint_range = model.jnt_range[_ARM_JOINTS] + lower_limits = np.where(is_limited, joint_range[:, 0], -np.pi) + upper_limits = np.where(is_limited, joint_range[:, 1], np.pi) + angles = uniform(lower_limits, upper_limits) + data.qpos[_ARM_JOINTS] = angles + + # Symmetrize hand. + data.qpos['finger'] = data.qpos['thumb'] + + # Randomise target location. + target_x = uniform(-.4, .4) + target_z = uniform(.1, .4) + if self._insert: + target_angle = uniform(-np.pi/3, np.pi/3) + model.body_pos[self._receptacle, ['x', 'z']] = target_x, target_z + model.body_quat[self._receptacle, ['qw', 'qy']] = [ + np.cos(target_angle/2), np.sin(target_angle/2)] + else: + target_angle = uniform(-np.pi, np.pi) + + model.body_pos[self._target, ['x', 'z']] = target_x, target_z + model.body_quat[self._target, ['qw', 'qy']] = [ + np.cos(target_angle/2), np.sin(target_angle/2)] + + # Randomise object location. + object_init_probs = [_P_IN_HAND, _P_IN_TARGET, 1-_P_IN_HAND-_P_IN_TARGET] + init_type = choice(['in_hand', 'in_target', 'uniform'], + p=object_init_probs) + if init_type == 'in_target': + object_x = target_x + object_z = target_z + object_angle = target_angle + elif init_type == 'in_hand': + physics.after_reset() + object_x = data.site_xpos['grasp', 'x'] + object_z = data.site_xpos['grasp', 'z'] + grasp_direction = data.site_xmat['grasp', ['xx', 'zx']] + object_angle = np.pi-np.arctan2(grasp_direction[1], grasp_direction[0]) + else: + object_x = uniform(-.5, .5) + object_z = uniform(0, .7) + object_angle = uniform(0, 2*np.pi) + data.qvel[self._object + '_x'] = uniform(-5, 5) + + data.qpos[self._object_joints] = object_x, object_z, object_angle + + # Check for collisions. + physics.after_reset() + penetrating = physics.data.ncon > 0 + + super(Bring, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns either features or only sensors (to be used with pixels).""" + obs = collections.OrderedDict() + obs['arm_pos'] = physics.bounded_joint_pos(_ARM_JOINTS) + obs['arm_vel'] = physics.joint_vel(_ARM_JOINTS) + obs['touch'] = physics.touch() + if self._fully_observable: + obs['hand_pos'] = physics.body_2d_pose('hand') + obs['object_pos'] = physics.body_2d_pose(self._object) + obs['object_vel'] = physics.joint_vel(self._object_joints) + obs['target_pos'] = physics.body_2d_pose(self._target) + return obs + + def _is_close(self, distance): + return rewards.tolerance(distance, (0, _CLOSE), _CLOSE*2) + + def _peg_reward(self, physics): + """Returns a reward for bringing the peg prop to the target.""" + grasp = self._is_close(physics.site_distance('peg_grasp', 'grasp')) + pinch = self._is_close(physics.site_distance('peg_pinch', 'pinch')) + grasping = (grasp + pinch) / 2 + bring = self._is_close(physics.site_distance('peg', 'target_peg')) + bring_tip = self._is_close(physics.site_distance('target_peg_tip', + 'peg_tip')) + bringing = (bring + bring_tip) / 2 + return max(bringing, grasping/3) + + def _ball_reward(self, physics): + """Returns a reward for bringing the ball prop to the target.""" + return self._is_close(physics.site_distance('ball', 'target_ball')) + + def get_reward(self, physics): + """Returns a reward to the agent.""" + if self._use_peg: + return self._peg_reward(physics) + else: + return self._ball_reward(physics) diff --git a/local_dm_control_suite/manipulator.xml b/local_dm_control_suite/manipulator.xml new file mode 100755 index 0000000..d6d1767 --- /dev/null +++ b/local_dm_control_suite/manipulator.xml @@ -0,0 +1,211 @@ + + + + + + + + + + + + + + > + + diff --git a/local_dm_control_suite/pendulum.py b/local_dm_control_suite/pendulum.py new file mode 100755 index 0000000..38f442b --- /dev/null +++ b/local_dm_control_suite/pendulum.py @@ -0,0 +1,114 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Pendulum domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import rewards +import numpy as np + + +_DEFAULT_TIME_LIMIT = 20 +_ANGLE_BOUND = 8 +_COSINE_BOUND = np.cos(np.deg2rad(_ANGLE_BOUND)) +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('pendulum.xml'), common.ASSETS + + +@SUITE.add('benchmarking') +def swingup(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns pendulum swingup task .""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = SwingUp(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Pendulum domain.""" + + def pole_vertical(self): + """Returns vertical (z) component of pole frame.""" + return self.named.data.xmat['pole', 'zz'] + + def angular_velocity(self): + """Returns the angular velocity of the pole.""" + return self.named.data.qvel['hinge'].copy() + + def pole_orientation(self): + """Returns both horizontal and vertical components of pole frame.""" + return self.named.data.xmat['pole', ['zz', 'xz']] + + +class SwingUp(base.Task): + """A Pendulum `Task` to swing up and balance the pole.""" + + def __init__(self, random=None): + """Initialize an instance of `Pendulum`. + + Args: + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + super(SwingUp, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Pole is set to a random angle between [-pi, pi). + + Args: + physics: An instance of `Physics`. + + """ + physics.named.data.qpos['hinge'] = self.random.uniform(-np.pi, np.pi) + super(SwingUp, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation. + + Observations are states concatenating pole orientation and angular velocity + and pixels from fixed camera. + + Args: + physics: An instance of `physics`, Pendulum physics. + + Returns: + A `dict` of observation. + """ + obs = collections.OrderedDict() + obs['orientation'] = physics.pole_orientation() + obs['velocity'] = physics.angular_velocity() + return obs + + def get_reward(self, physics): + return rewards.tolerance(physics.pole_vertical(), (_COSINE_BOUND, 1)) diff --git a/local_dm_control_suite/pendulum.xml b/local_dm_control_suite/pendulum.xml new file mode 100755 index 0000000..14377ae --- /dev/null +++ b/local_dm_control_suite/pendulum.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/local_dm_control_suite/point_mass.py b/local_dm_control_suite/point_mass.py new file mode 100755 index 0000000..b45ba17 --- /dev/null +++ b/local_dm_control_suite/point_mass.py @@ -0,0 +1,130 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Point-mass domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.suite.utils import randomizers +from dm_control.utils import containers +from dm_control.utils import rewards +import numpy as np + +_DEFAULT_TIME_LIMIT = 20 +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('point_mass.xml'), common.ASSETS + + +@SUITE.add('benchmarking', 'easy') +def easy(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the easy point_mass task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = PointMass(randomize_gains=False, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@SUITE.add() +def hard(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the hard point_mass task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = PointMass(randomize_gains=True, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +class Physics(mujoco.Physics): + """physics for the point_mass domain.""" + + def mass_to_target(self): + """Returns the vector from mass to target in global coordinate.""" + return (self.named.data.geom_xpos['target'] - + self.named.data.geom_xpos['pointmass']) + + def mass_to_target_dist(self): + """Returns the distance from mass to the target.""" + return np.linalg.norm(self.mass_to_target()) + + +class PointMass(base.Task): + """A point_mass `Task` to reach target with smooth reward.""" + + def __init__(self, randomize_gains, random=None): + """Initialize an instance of `PointMass`. + + Args: + randomize_gains: A `bool`, whether to randomize the actuator gains. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._randomize_gains = randomize_gains + super(PointMass, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + If _randomize_gains is True, the relationship between the controls and + the joints is randomized, so that each control actuates a random linear + combination of joints. + + Args: + physics: An instance of `mujoco.Physics`. + """ + randomizers.randomize_limited_and_rotational_joints(physics, self.random) + if self._randomize_gains: + dir1 = self.random.randn(2) + dir1 /= np.linalg.norm(dir1) + # Find another actuation direction that is not 'too parallel' to dir1. + parallel = True + while parallel: + dir2 = self.random.randn(2) + dir2 /= np.linalg.norm(dir2) + parallel = abs(np.dot(dir1, dir2)) > 0.9 + physics.model.wrap_prm[[0, 1]] = dir1 + physics.model.wrap_prm[[2, 3]] = dir2 + super(PointMass, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of the state.""" + obs = collections.OrderedDict() + obs['position'] = physics.position() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a reward to the agent.""" + target_size = physics.named.model.geom_size['target', 0] + near_target = rewards.tolerance(physics.mass_to_target_dist(), + bounds=(0, target_size), margin=target_size) + control_reward = rewards.tolerance(physics.control(), margin=1, + value_at_margin=0, + sigmoid='quadratic').mean() + small_control = (control_reward + 4) / 5 + return near_target * small_control diff --git a/local_dm_control_suite/point_mass.xml b/local_dm_control_suite/point_mass.xml new file mode 100755 index 0000000..c447cf6 --- /dev/null +++ b/local_dm_control_suite/point_mass.xml @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/local_dm_control_suite/quadruped.py b/local_dm_control_suite/quadruped.py new file mode 100755 index 0000000..9e326d7 --- /dev/null +++ b/local_dm_control_suite/quadruped.py @@ -0,0 +1,480 @@ +# Copyright 2019 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Quadruped Domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.mujoco.wrapper import mjbindings +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import rewards +from dm_control.utils import xml_tools + +from lxml import etree +import numpy as np +from scipy import ndimage + +enums = mjbindings.enums +mjlib = mjbindings.mjlib + + +_DEFAULT_TIME_LIMIT = 20 +_CONTROL_TIMESTEP = .02 + +# Horizontal speeds above which the move reward is 1. +_RUN_SPEED = 5 +_WALK_SPEED = 0.5 + +# Constants related to terrain generation. +_HEIGHTFIELD_ID = 0 +_TERRAIN_SMOOTHNESS = 0.15 # 0.0: maximally bumpy; 1.0: completely smooth. +_TERRAIN_BUMP_SCALE = 2 # Spatial scale of terrain bumps (in meters). + +# Named model elements. +_TOES = ['toe_front_left', 'toe_back_left', 'toe_back_right', 'toe_front_right'] +_WALLS = ['wall_px', 'wall_py', 'wall_nx', 'wall_ny'] + +SUITE = containers.TaggedTasks() + + +def make_model(floor_size=None, terrain=False, rangefinders=False, + walls_and_ball=False): + """Returns the model XML string.""" + xml_string = common.read_model('quadruped.xml') + parser = etree.XMLParser(remove_blank_text=True) + mjcf = etree.XML(xml_string, parser) + + # Set floor size. + if floor_size is not None: + floor_geom = mjcf.find('.//geom[@name={!r}]'.format('floor')) + floor_geom.attrib['size'] = '{} {} .5'.format(floor_size, floor_size) + + # Remove walls, ball and target. + if not walls_and_ball: + for wall in _WALLS: + wall_geom = xml_tools.find_element(mjcf, 'geom', wall) + wall_geom.getparent().remove(wall_geom) + + # Remove ball. + ball_body = xml_tools.find_element(mjcf, 'body', 'ball') + ball_body.getparent().remove(ball_body) + + # Remove target. + target_site = xml_tools.find_element(mjcf, 'site', 'target') + target_site.getparent().remove(target_site) + + # Remove terrain. + if not terrain: + terrain_geom = xml_tools.find_element(mjcf, 'geom', 'terrain') + terrain_geom.getparent().remove(terrain_geom) + + # Remove rangefinders if they're not used, as range computations can be + # expensive, especially in a scene with heightfields. + if not rangefinders: + rangefinder_sensors = mjcf.findall('.//rangefinder') + for rf in rangefinder_sensors: + rf.getparent().remove(rf) + + return etree.tostring(mjcf, pretty_print=True) + + +@SUITE.add() +def walk(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Walk task.""" + xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED) + physics = Physics.from_xml_string(xml_string, common.ASSETS) + task = Move(desired_speed=_WALK_SPEED, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add() +def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Run task.""" + xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _RUN_SPEED) + physics = Physics.from_xml_string(xml_string, common.ASSETS) + task = Move(desired_speed=_RUN_SPEED, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add() +def escape(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns the Escape task.""" + xml_string = make_model(floor_size=40, terrain=True, rangefinders=True) + physics = Physics.from_xml_string(xml_string, common.ASSETS) + task = Escape(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add() +def fetch(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Fetch task.""" + xml_string = make_model(walls_and_ball=True) + physics = Physics.from_xml_string(xml_string, common.ASSETS) + task = Fetch(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Quadruped domain.""" + + def _reload_from_data(self, data): + super(Physics, self)._reload_from_data(data) + # Clear cached sensor names when the physics is reloaded. + self._sensor_types_to_names = {} + self._hinge_names = [] + + def _get_sensor_names(self, *sensor_types): + try: + sensor_names = self._sensor_types_to_names[sensor_types] + except KeyError: + [sensor_ids] = np.where(np.in1d(self.model.sensor_type, sensor_types)) + sensor_names = [self.model.id2name(s_id, 'sensor') for s_id in sensor_ids] + self._sensor_types_to_names[sensor_types] = sensor_names + return sensor_names + + def torso_upright(self): + """Returns the dot-product of the torso z-axis and the global z-axis.""" + return np.asarray(self.named.data.xmat['torso', 'zz']) + + def torso_velocity(self): + """Returns the velocity of the torso, in the local frame.""" + return self.named.data.sensordata['velocimeter'].copy() + + def egocentric_state(self): + """Returns the state without global orientation or position.""" + if not self._hinge_names: + [hinge_ids] = np.nonzero(self.model.jnt_type == + enums.mjtJoint.mjJNT_HINGE) + self._hinge_names = [self.model.id2name(j_id, 'joint') + for j_id in hinge_ids] + return np.hstack((self.named.data.qpos[self._hinge_names], + self.named.data.qvel[self._hinge_names], + self.data.act)) + + def toe_positions(self): + """Returns toe positions in egocentric frame.""" + torso_frame = self.named.data.xmat['torso'].reshape(3, 3) + torso_pos = self.named.data.xpos['torso'] + torso_to_toe = self.named.data.xpos[_TOES] - torso_pos + return torso_to_toe.dot(torso_frame) + + def force_torque(self): + """Returns scaled force/torque sensor readings at the toes.""" + force_torque_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_FORCE, + enums.mjtSensor.mjSENS_TORQUE) + return np.arcsinh(self.named.data.sensordata[force_torque_sensors]) + + def imu(self): + """Returns IMU-like sensor readings.""" + imu_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_GYRO, + enums.mjtSensor.mjSENS_ACCELEROMETER) + return self.named.data.sensordata[imu_sensors] + + def rangefinder(self): + """Returns scaled rangefinder sensor readings.""" + rf_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_RANGEFINDER) + rf_readings = self.named.data.sensordata[rf_sensors] + no_intersection = -1.0 + return np.where(rf_readings == no_intersection, 1.0, np.tanh(rf_readings)) + + def origin_distance(self): + """Returns the distance from the origin to the workspace.""" + return np.asarray(np.linalg.norm(self.named.data.site_xpos['workspace'])) + + def origin(self): + """Returns origin position in the torso frame.""" + torso_frame = self.named.data.xmat['torso'].reshape(3, 3) + torso_pos = self.named.data.xpos['torso'] + return -torso_pos.dot(torso_frame) + + def ball_state(self): + """Returns ball position and velocity relative to the torso frame.""" + data = self.named.data + torso_frame = data.xmat['torso'].reshape(3, 3) + ball_rel_pos = data.xpos['ball'] - data.xpos['torso'] + ball_rel_vel = data.qvel['ball_root'][:3] - data.qvel['root'][:3] + ball_rot_vel = data.qvel['ball_root'][3:] + ball_state = np.vstack((ball_rel_pos, ball_rel_vel, ball_rot_vel)) + return ball_state.dot(torso_frame).ravel() + + def target_position(self): + """Returns target position in torso frame.""" + torso_frame = self.named.data.xmat['torso'].reshape(3, 3) + torso_pos = self.named.data.xpos['torso'] + torso_to_target = self.named.data.site_xpos['target'] - torso_pos + return torso_to_target.dot(torso_frame) + + def ball_to_target_distance(self): + """Returns horizontal distance from the ball to the target.""" + ball_to_target = (self.named.data.site_xpos['target'] - + self.named.data.xpos['ball']) + return np.linalg.norm(ball_to_target[:2]) + + def self_to_ball_distance(self): + """Returns horizontal distance from the quadruped workspace to the ball.""" + self_to_ball = (self.named.data.site_xpos['workspace'] + -self.named.data.xpos['ball']) + return np.linalg.norm(self_to_ball[:2]) + + +def _find_non_contacting_height(physics, orientation, x_pos=0.0, y_pos=0.0): + """Find a height with no contacts given a body orientation. + + Args: + physics: An instance of `Physics`. + orientation: A quaternion. + x_pos: A float. Position along global x-axis. + y_pos: A float. Position along global y-axis. + Raises: + RuntimeError: If a non-contacting configuration has not been found after + 10,000 attempts. + """ + z_pos = 0.0 # Start embedded in the floor. + num_contacts = 1 + num_attempts = 0 + # Move up in 1cm increments until no contacts. + while num_contacts > 0: + try: + with physics.reset_context(): + physics.named.data.qpos['root'][:3] = x_pos, y_pos, z_pos + physics.named.data.qpos['root'][3:] = orientation + except control.PhysicsError: + # We may encounter a PhysicsError here due to filling the contact + # buffer, in which case we simply increment the height and continue. + pass + num_contacts = physics.data.ncon + z_pos += 0.01 + num_attempts += 1 + if num_attempts > 10000: + raise RuntimeError('Failed to find a non-contacting configuration.') + + +def _common_observations(physics): + """Returns the observations common to all tasks.""" + obs = collections.OrderedDict() + obs['egocentric_state'] = physics.egocentric_state() + obs['torso_velocity'] = physics.torso_velocity() + obs['torso_upright'] = physics.torso_upright() + obs['imu'] = physics.imu() + obs['force_torque'] = physics.force_torque() + return obs + + +def _upright_reward(physics, deviation_angle=0): + """Returns a reward proportional to how upright the torso is. + + Args: + physics: an instance of `Physics`. + deviation_angle: A float, in degrees. The reward is 0 when the torso is + exactly upside-down and 1 when the torso's z-axis is less than + `deviation_angle` away from the global z-axis. + """ + deviation = np.cos(np.deg2rad(deviation_angle)) + return rewards.tolerance( + physics.torso_upright(), + bounds=(deviation, float('inf')), + sigmoid='linear', + margin=1 + deviation, + value_at_margin=0) + + +class Move(base.Task): + """A quadruped task solved by moving forward at a designated speed.""" + + def __init__(self, desired_speed, random=None): + """Initializes an instance of `Move`. + + Args: + desired_speed: A float. If this value is zero, reward is given simply + for standing upright. Otherwise this specifies the horizontal velocity + at which the velocity-dependent reward component is maximized. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._desired_speed = desired_speed + super(Move, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Args: + physics: An instance of `Physics`. + + """ + # Initial configuration. + orientation = self.random.randn(4) + orientation /= np.linalg.norm(orientation) + _find_non_contacting_height(physics, orientation) + super(Move, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation to the agent.""" + return _common_observations(physics) + + def get_reward(self, physics): + """Returns a reward to the agent.""" + + # Move reward term. + move_reward = rewards.tolerance( + physics.torso_velocity()[0], + bounds=(self._desired_speed, float('inf')), + margin=self._desired_speed, + value_at_margin=0.5, + sigmoid='linear') + + return _upright_reward(physics) * move_reward + + +class Escape(base.Task): + """A quadruped task solved by escaping a bowl-shaped terrain.""" + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Args: + physics: An instance of `Physics`. + + """ + # Get heightfield resolution, assert that it is square. + res = physics.model.hfield_nrow[_HEIGHTFIELD_ID] + assert res == physics.model.hfield_ncol[_HEIGHTFIELD_ID] + # Sinusoidal bowl shape. + row_grid, col_grid = np.ogrid[-1:1:res*1j, -1:1:res*1j] + radius = np.clip(np.sqrt(col_grid**2 + row_grid**2), .04, 1) + bowl_shape = .5 - np.cos(2*np.pi*radius)/2 + # Random smooth bumps. + terrain_size = 2 * physics.model.hfield_size[_HEIGHTFIELD_ID, 0] + bump_res = int(terrain_size / _TERRAIN_BUMP_SCALE) + bumps = self.random.uniform(_TERRAIN_SMOOTHNESS, 1, (bump_res, bump_res)) + smooth_bumps = ndimage.zoom(bumps, res / float(bump_res)) + # Terrain is elementwise product. + terrain = bowl_shape * smooth_bumps + start_idx = physics.model.hfield_adr[_HEIGHTFIELD_ID] + physics.model.hfield_data[start_idx:start_idx+res**2] = terrain.ravel() + super(Escape, self).initialize_episode(physics) + + # If we have a rendering context, we need to re-upload the modified + # heightfield data. + if physics.contexts: + with physics.contexts.gl.make_current() as ctx: + ctx.call(mjlib.mjr_uploadHField, + physics.model.ptr, + physics.contexts.mujoco.ptr, + _HEIGHTFIELD_ID) + + # Initial configuration. + orientation = self.random.randn(4) + orientation /= np.linalg.norm(orientation) + _find_non_contacting_height(physics, orientation) + + def get_observation(self, physics): + """Returns an observation to the agent.""" + obs = _common_observations(physics) + obs['origin'] = physics.origin() + obs['rangefinder'] = physics.rangefinder() + return obs + + def get_reward(self, physics): + """Returns a reward to the agent.""" + + # Escape reward term. + terrain_size = physics.model.hfield_size[_HEIGHTFIELD_ID, 0] + escape_reward = rewards.tolerance( + physics.origin_distance(), + bounds=(terrain_size, float('inf')), + margin=terrain_size, + value_at_margin=0, + sigmoid='linear') + + return _upright_reward(physics, deviation_angle=20) * escape_reward + + +class Fetch(base.Task): + """A quadruped task solved by bringing a ball to the origin.""" + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Args: + physics: An instance of `Physics`. + + """ + # Initial configuration, random azimuth and horizontal position. + azimuth = self.random.uniform(0, 2*np.pi) + orientation = np.array((np.cos(azimuth/2), 0, 0, np.sin(azimuth/2))) + spawn_radius = 0.9 * physics.named.model.geom_size['floor', 0] + x_pos, y_pos = self.random.uniform(-spawn_radius, spawn_radius, size=(2,)) + _find_non_contacting_height(physics, orientation, x_pos, y_pos) + + # Initial ball state. + physics.named.data.qpos['ball_root'][:2] = self.random.uniform( + -spawn_radius, spawn_radius, size=(2,)) + physics.named.data.qpos['ball_root'][2] = 2 + physics.named.data.qvel['ball_root'][:2] = 5*self.random.randn(2) + super(Fetch, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation to the agent.""" + obs = _common_observations(physics) + obs['ball_state'] = physics.ball_state() + obs['target_position'] = physics.target_position() + return obs + + def get_reward(self, physics): + """Returns a reward to the agent.""" + + # Reward for moving close to the ball. + arena_radius = physics.named.model.geom_size['floor', 0] * np.sqrt(2) + workspace_radius = physics.named.model.site_size['workspace', 0] + ball_radius = physics.named.model.geom_size['ball', 0] + reach_reward = rewards.tolerance( + physics.self_to_ball_distance(), + bounds=(0, workspace_radius+ball_radius), + sigmoid='linear', + margin=arena_radius, value_at_margin=0) + + # Reward for bringing the ball to the target. + target_radius = physics.named.model.site_size['target', 0] + fetch_reward = rewards.tolerance( + physics.ball_to_target_distance(), + bounds=(0, target_radius), + sigmoid='linear', + margin=arena_radius, value_at_margin=0) + + reach_then_fetch = reach_reward * (0.5 + 0.5*fetch_reward) + + return _upright_reward(physics) * reach_then_fetch diff --git a/local_dm_control_suite/quadruped.xml b/local_dm_control_suite/quadruped.xml new file mode 100755 index 0000000..958d2c0 --- /dev/null +++ b/local_dm_control_suite/quadruped.xml @@ -0,0 +1,329 @@ + + + + + + + + + + + + + + + + + diff --git a/local_dm_control_suite/reacher.py b/local_dm_control_suite/reacher.py new file mode 100755 index 0000000..feea8b4 --- /dev/null +++ b/local_dm_control_suite/reacher.py @@ -0,0 +1,116 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Reacher domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.suite.utils import randomizers +from dm_control.utils import containers +from dm_control.utils import rewards +import numpy as np + +SUITE = containers.TaggedTasks() +_DEFAULT_TIME_LIMIT = 20 +_BIG_TARGET = .05 +_SMALL_TARGET = .015 + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('reacher.xml'), common.ASSETS + + +@SUITE.add('benchmarking', 'easy') +def easy(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns reacher with sparse reward with 5e-2 tol and randomized target.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Reacher(target_size=_BIG_TARGET, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@SUITE.add('benchmarking') +def hard(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns reacher with sparse reward with 1e-2 tol and randomized target.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = Reacher(target_size=_SMALL_TARGET, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Reacher domain.""" + + def finger_to_target(self): + """Returns the vector from target to finger in global coordinates.""" + return (self.named.data.geom_xpos['target', :2] - + self.named.data.geom_xpos['finger', :2]) + + def finger_to_target_dist(self): + """Returns the signed distance between the finger and target surface.""" + return np.linalg.norm(self.finger_to_target()) + + +class Reacher(base.Task): + """A reacher `Task` to reach the target.""" + + def __init__(self, target_size, random=None): + """Initialize an instance of `Reacher`. + + Args: + target_size: A `float`, tolerance to determine whether finger reached the + target. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._target_size = target_size + super(Reacher, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode.""" + physics.named.model.geom_size['target', 0] = self._target_size + randomizers.randomize_limited_and_rotational_joints(physics, self.random) + + # Randomize target position + angle = self.random.uniform(0, 2 * np.pi) + radius = self.random.uniform(.05, .20) + physics.named.model.geom_pos['target', 'x'] = radius * np.sin(angle) + physics.named.model.geom_pos['target', 'y'] = radius * np.cos(angle) + + super(Reacher, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of the state and the target position.""" + obs = collections.OrderedDict() + obs['position'] = physics.position() + obs['to_target'] = physics.finger_to_target() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + radii = physics.named.model.geom_size[['target', 'finger'], 0].sum() + return rewards.tolerance(physics.finger_to_target_dist(), (0, radii)) diff --git a/local_dm_control_suite/reacher.xml b/local_dm_control_suite/reacher.xml new file mode 100755 index 0000000..343f799 --- /dev/null +++ b/local_dm_control_suite/reacher.xml @@ -0,0 +1,47 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/local_dm_control_suite/stacker.py b/local_dm_control_suite/stacker.py new file mode 100755 index 0000000..6d4d49c --- /dev/null +++ b/local_dm_control_suite/stacker.py @@ -0,0 +1,208 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Planar Stacker domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.utils import containers +from dm_control.utils import rewards +from dm_control.utils import xml_tools + +from lxml import etree +import numpy as np + + +_CLOSE = .01 # (Meters) Distance below which a thing is considered close. +_CONTROL_TIMESTEP = .01 # (Seconds) +_TIME_LIMIT = 10 # (Seconds) +_ARM_JOINTS = ['arm_root', 'arm_shoulder', 'arm_elbow', 'arm_wrist', + 'finger', 'fingertip', 'thumb', 'thumbtip'] + +SUITE = containers.TaggedTasks() + + +def make_model(n_boxes): + """Returns a tuple containing the model XML string and a dict of assets.""" + xml_string = common.read_model('stacker.xml') + parser = etree.XMLParser(remove_blank_text=True) + mjcf = etree.XML(xml_string, parser) + + # Remove unused boxes + for b in range(n_boxes, 4): + box = xml_tools.find_element(mjcf, 'body', 'box' + str(b)) + box.getparent().remove(box) + + return etree.tostring(mjcf, pretty_print=True), common.ASSETS + + +@SUITE.add('hard') +def stack_2(fully_observable=True, time_limit=_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns stacker task with 2 boxes.""" + n_boxes = 2 + physics = Physics.from_xml_string(*make_model(n_boxes=n_boxes)) + task = Stack(n_boxes=n_boxes, + fully_observable=fully_observable, + random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit, + **environment_kwargs) + + +@SUITE.add('hard') +def stack_4(fully_observable=True, time_limit=_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns stacker task with 4 boxes.""" + n_boxes = 4 + physics = Physics.from_xml_string(*make_model(n_boxes=n_boxes)) + task = Stack(n_boxes=n_boxes, + fully_observable=fully_observable, + random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics with additional features for the Planar Manipulator domain.""" + + def bounded_joint_pos(self, joint_names): + """Returns joint positions as (sin, cos) values.""" + joint_pos = self.named.data.qpos[joint_names] + return np.vstack([np.sin(joint_pos), np.cos(joint_pos)]).T + + def joint_vel(self, joint_names): + """Returns joint velocities.""" + return self.named.data.qvel[joint_names] + + def body_2d_pose(self, body_names, orientation=True): + """Returns positions and/or orientations of bodies.""" + if not isinstance(body_names, str): + body_names = np.array(body_names).reshape(-1, 1) # Broadcast indices. + pos = self.named.data.xpos[body_names, ['x', 'z']] + if orientation: + ori = self.named.data.xquat[body_names, ['qw', 'qy']] + return np.hstack([pos, ori]) + else: + return pos + + def touch(self): + return np.log1p(self.data.sensordata) + + def site_distance(self, site1, site2): + site1_to_site2 = np.diff(self.named.data.site_xpos[[site2, site1]], axis=0) + return np.linalg.norm(site1_to_site2) + + +class Stack(base.Task): + """A Stack `Task`: stack the boxes.""" + + def __init__(self, n_boxes, fully_observable, random=None): + """Initialize an instance of the `Stack` task. + + Args: + n_boxes: An `int`, number of boxes to stack. + fully_observable: A `bool`, whether the observation should contain the + positions and velocities of the boxes and the location of the target. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._n_boxes = n_boxes + self._box_names = ['box' + str(b) for b in range(n_boxes)] + self._box_joint_names = [] + for name in self._box_names: + for dim in 'xyz': + self._box_joint_names.append('_'.join([name, dim])) + self._fully_observable = fully_observable + super(Stack, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode.""" + # Local aliases + randint = self.random.randint + uniform = self.random.uniform + model = physics.named.model + data = physics.named.data + + # Find a collision-free random initial configuration. + penetrating = True + while penetrating: + + # Randomise angles of arm joints. + is_limited = model.jnt_limited[_ARM_JOINTS].astype(np.bool) + joint_range = model.jnt_range[_ARM_JOINTS] + lower_limits = np.where(is_limited, joint_range[:, 0], -np.pi) + upper_limits = np.where(is_limited, joint_range[:, 1], np.pi) + angles = uniform(lower_limits, upper_limits) + data.qpos[_ARM_JOINTS] = angles + + # Symmetrize hand. + data.qpos['finger'] = data.qpos['thumb'] + + # Randomise target location. + target_height = 2*randint(self._n_boxes) + 1 + box_size = model.geom_size['target', 0] + model.body_pos['target', 'z'] = box_size * target_height + model.body_pos['target', 'x'] = uniform(-.37, .37) + + # Randomise box locations. + for name in self._box_names: + data.qpos[name + '_x'] = uniform(.1, .3) + data.qpos[name + '_z'] = uniform(0, .7) + data.qpos[name + '_y'] = uniform(0, 2*np.pi) + + # Check for collisions. + physics.after_reset() + penetrating = physics.data.ncon > 0 + + super(Stack, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns either features or only sensors (to be used with pixels).""" + obs = collections.OrderedDict() + obs['arm_pos'] = physics.bounded_joint_pos(_ARM_JOINTS) + obs['arm_vel'] = physics.joint_vel(_ARM_JOINTS) + obs['touch'] = physics.touch() + if self._fully_observable: + obs['hand_pos'] = physics.body_2d_pose('hand') + obs['box_pos'] = physics.body_2d_pose(self._box_names) + obs['box_vel'] = physics.joint_vel(self._box_joint_names) + obs['target_pos'] = physics.body_2d_pose('target', orientation=False) + return obs + + def get_reward(self, physics): + """Returns a reward to the agent.""" + box_size = physics.named.model.geom_size['target', 0] + min_box_to_target_distance = min(physics.site_distance(name, 'target') + for name in self._box_names) + box_is_close = rewards.tolerance(min_box_to_target_distance, + margin=2*box_size) + hand_to_target_distance = physics.site_distance('grasp', 'target') + hand_is_far = rewards.tolerance(hand_to_target_distance, + bounds=(.1, float('inf')), + margin=_CLOSE) + return box_is_close * hand_is_far diff --git a/local_dm_control_suite/stacker.xml b/local_dm_control_suite/stacker.xml new file mode 100755 index 0000000..7af4877 --- /dev/null +++ b/local_dm_control_suite/stacker.xml @@ -0,0 +1,193 @@ + + + + + + + + + + + + + + > + + diff --git a/local_dm_control_suite/swimmer.py b/local_dm_control_suite/swimmer.py new file mode 100755 index 0000000..96fd8ea --- /dev/null +++ b/local_dm_control_suite/swimmer.py @@ -0,0 +1,215 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Procedurally generated Swimmer domain.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.suite.utils import randomizers +from dm_control.utils import containers +from dm_control.utils import rewards +from lxml import etree +import numpy as np +from six.moves import range + +_DEFAULT_TIME_LIMIT = 30 +_CONTROL_TIMESTEP = .03 # (Seconds) + +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(n_joints): + """Returns a tuple containing the model XML string and a dict of assets. + + Args: + n_joints: An integer specifying the number of joints in the swimmer. + + Returns: + A tuple `(model_xml_string, assets)`, where `assets` is a dict consisting of + `{filename: contents_string}` pairs. + """ + return _make_model(n_joints), common.ASSETS + + +@SUITE.add('benchmarking') +def swimmer6(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns a 6-link swimmer.""" + return _make_swimmer(6, time_limit, random=random, + environment_kwargs=environment_kwargs) + + +@SUITE.add('benchmarking') +def swimmer15(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns a 15-link swimmer.""" + return _make_swimmer(15, time_limit, random=random, + environment_kwargs=environment_kwargs) + + +def swimmer(n_links=3, time_limit=_DEFAULT_TIME_LIMIT, + random=None, environment_kwargs=None): + """Returns a swimmer with n links.""" + return _make_swimmer(n_links, time_limit, random=random, + environment_kwargs=environment_kwargs) + + +def _make_swimmer(n_joints, time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns a swimmer control environment.""" + model_string, assets = get_model_and_assets(n_joints) + physics = Physics.from_xml_string(model_string, assets=assets) + task = Swimmer(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +def _make_model(n_bodies): + """Generates an xml string defining a swimmer with `n_bodies` bodies.""" + if n_bodies < 3: + raise ValueError('At least 3 bodies required. Received {}'.format(n_bodies)) + mjcf = etree.fromstring(common.read_model('swimmer.xml')) + head_body = mjcf.find('./worldbody/body') + actuator = etree.SubElement(mjcf, 'actuator') + sensor = etree.SubElement(mjcf, 'sensor') + + parent = head_body + for body_index in range(n_bodies - 1): + site_name = 'site_{}'.format(body_index) + child = _make_body(body_index=body_index) + child.append(etree.Element('site', name=site_name)) + joint_name = 'joint_{}'.format(body_index) + joint_limit = 360.0/n_bodies + joint_range = '{} {}'.format(-joint_limit, joint_limit) + child.append(etree.Element('joint', {'name': joint_name, + 'range': joint_range})) + motor_name = 'motor_{}'.format(body_index) + actuator.append(etree.Element('motor', name=motor_name, joint=joint_name)) + velocimeter_name = 'velocimeter_{}'.format(body_index) + sensor.append(etree.Element('velocimeter', name=velocimeter_name, + site=site_name)) + gyro_name = 'gyro_{}'.format(body_index) + sensor.append(etree.Element('gyro', name=gyro_name, site=site_name)) + parent.append(child) + parent = child + + # Move tracking cameras further away from the swimmer according to its length. + cameras = mjcf.findall('./worldbody/body/camera') + scale = n_bodies / 6.0 + for cam in cameras: + if cam.get('mode') == 'trackcom': + old_pos = cam.get('pos').split(' ') + new_pos = ' '.join([str(float(dim) * scale) for dim in old_pos]) + cam.set('pos', new_pos) + + return etree.tostring(mjcf, pretty_print=True) + + +def _make_body(body_index): + """Generates an xml string defining a single physical body.""" + body_name = 'segment_{}'.format(body_index) + visual_name = 'visual_{}'.format(body_index) + inertial_name = 'inertial_{}'.format(body_index) + body = etree.Element('body', name=body_name) + body.set('pos', '0 .1 0') + etree.SubElement(body, 'geom', {'class': 'visual', 'name': visual_name}) + etree.SubElement(body, 'geom', {'class': 'inertial', 'name': inertial_name}) + return body + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the swimmer domain.""" + + def nose_to_target(self): + """Returns a vector from nose to target in local coordinate of the head.""" + nose_to_target = (self.named.data.geom_xpos['target'] - + self.named.data.geom_xpos['nose']) + head_orientation = self.named.data.xmat['head'].reshape(3, 3) + return nose_to_target.dot(head_orientation)[:2] + + def nose_to_target_dist(self): + """Returns the distance from the nose to the target.""" + return np.linalg.norm(self.nose_to_target()) + + def body_velocities(self): + """Returns local body velocities: x,y linear, z rotational.""" + xvel_local = self.data.sensordata[12:].reshape((-1, 6)) + vx_vy_wz = [0, 1, 5] # Indices for linear x,y vels and rotational z vel. + return xvel_local[:, vx_vy_wz].ravel() + + def joints(self): + """Returns all internal joint angles (excluding root joints).""" + return self.data.qpos[3:].copy() + + +class Swimmer(base.Task): + """A swimmer `Task` to reach the target or just swim.""" + + def __init__(self, random=None): + """Initializes an instance of `Swimmer`. + + Args: + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + super(Swimmer, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + Initializes the swimmer orientation to [-pi, pi) and the relative joint + angle of each joint uniformly within its range. + + Args: + physics: An instance of `Physics`. + """ + # Random joint angles: + randomizers.randomize_limited_and_rotational_joints(physics, self.random) + # Random target position. + close_target = self.random.rand() < .2 # Probability of a close target. + target_box = .3 if close_target else 2 + xpos, ypos = self.random.uniform(-target_box, target_box, size=2) + physics.named.model.geom_pos['target', 'x'] = xpos + physics.named.model.geom_pos['target', 'y'] = ypos + physics.named.model.light_pos['target_light', 'x'] = xpos + physics.named.model.light_pos['target_light', 'y'] = ypos + + super(Swimmer, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of joint angles, body velocities and target.""" + obs = collections.OrderedDict() + obs['joints'] = physics.joints() + obs['to_target'] = physics.nose_to_target() + obs['body_velocities'] = physics.body_velocities() + return obs + + def get_reward(self, physics): + """Returns a smooth reward.""" + target_size = physics.named.model.geom_size['target', 0] + return rewards.tolerance(physics.nose_to_target_dist(), + bounds=(0, target_size), + margin=5*target_size, + sigmoid='long_tail') diff --git a/local_dm_control_suite/swimmer.xml b/local_dm_control_suite/swimmer.xml new file mode 100755 index 0000000..29c7bc8 --- /dev/null +++ b/local_dm_control_suite/swimmer.xml @@ -0,0 +1,57 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/local_dm_control_suite/tests/domains_test.py b/local_dm_control_suite/tests/domains_test.py new file mode 100755 index 0000000..4c148cf --- /dev/null +++ b/local_dm_control_suite/tests/domains_test.py @@ -0,0 +1,292 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Tests for dm_control.suite domains.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Internal dependencies. +from absl.testing import absltest +from absl.testing import parameterized +from dm_control import suite +from dm_control.rl import control +import mock +import numpy as np +import six +from six.moves import range +from six.moves import zip + + +def uniform_random_policy(action_spec, random=None): + lower_bounds = action_spec.minimum + upper_bounds = action_spec.maximum + # Draw values between -1 and 1 for unbounded actions. + lower_bounds = np.where(np.isinf(lower_bounds), -1.0, lower_bounds) + upper_bounds = np.where(np.isinf(upper_bounds), 1.0, upper_bounds) + random_state = np.random.RandomState(random) + def policy(time_step): + del time_step # Unused. + return random_state.uniform(lower_bounds, upper_bounds) + return policy + + +def step_environment(env, policy, num_episodes=5, max_steps_per_episode=10): + for _ in range(num_episodes): + step_count = 0 + time_step = env.reset() + yield time_step + while not time_step.last(): + action = policy(time_step) + time_step = env.step(action) + step_count += 1 + yield time_step + if step_count >= max_steps_per_episode: + break + + +def make_trajectory(domain, task, seed, **trajectory_kwargs): + env = suite.load(domain, task, task_kwargs={'random': seed}) + policy = uniform_random_policy(env.action_spec(), random=seed) + return step_environment(env, policy, **trajectory_kwargs) + + +class DomainTest(parameterized.TestCase): + """Tests run on all the tasks registered.""" + + def test_constants(self): + num_tasks = sum(len(tasks) for tasks in + six.itervalues(suite.TASKS_BY_DOMAIN)) + + self.assertLen(suite.ALL_TASKS, num_tasks) + + def _validate_observation(self, observation_dict, observation_spec): + obs = observation_dict.copy() + for name, spec in six.iteritems(observation_spec): + arr = obs.pop(name) + self.assertEqual(arr.shape, spec.shape) + self.assertEqual(arr.dtype, spec.dtype) + self.assertTrue( + np.all(np.isfinite(arr)), + msg='{!r} has non-finite value(s): {!r}'.format(name, arr)) + self.assertEmpty( + obs, + msg='Observation contains arrays(s) that are not in the spec: {!r}' + .format(obs)) + + def _validate_reward_range(self, time_step): + if time_step.first(): + self.assertIsNone(time_step.reward) + else: + self.assertIsInstance(time_step.reward, float) + self.assertBetween(time_step.reward, 0, 1) + + def _validate_discount(self, time_step): + if time_step.first(): + self.assertIsNone(time_step.discount) + else: + self.assertIsInstance(time_step.discount, float) + self.assertBetween(time_step.discount, 0, 1) + + def _validate_control_range(self, lower_bounds, upper_bounds): + for b in lower_bounds: + self.assertEqual(b, -1.0) + for b in upper_bounds: + self.assertEqual(b, 1.0) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_components_have_names(self, domain, task): + env = suite.load(domain, task) + model = env.physics.model + + object_types_and_size_fields = [ + ('body', 'nbody'), + ('joint', 'njnt'), + ('geom', 'ngeom'), + ('site', 'nsite'), + ('camera', 'ncam'), + ('light', 'nlight'), + ('mesh', 'nmesh'), + ('hfield', 'nhfield'), + ('texture', 'ntex'), + ('material', 'nmat'), + ('equality', 'neq'), + ('tendon', 'ntendon'), + ('actuator', 'nu'), + ('sensor', 'nsensor'), + ('numeric', 'nnumeric'), + ('text', 'ntext'), + ('tuple', 'ntuple'), + ] + for object_type, size_field in object_types_and_size_fields: + for idx in range(getattr(model, size_field)): + object_name = model.id2name(idx, object_type) + self.assertNotEqual(object_name, '', + msg='Model {!r} contains unnamed {!r} with ID {}.' + .format(model.name, object_type, idx)) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_model_has_at_least_2_cameras(self, domain, task): + env = suite.load(domain, task) + model = env.physics.model + self.assertGreaterEqual(model.ncam, 2, + 'Model {!r} should have at least 2 cameras, has {}.' + .format(model.name, model.ncam)) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_task_conforms_to_spec(self, domain, task): + """Tests that the environment timesteps conform to specifications.""" + is_benchmark = (domain, task) in suite.BENCHMARKING + env = suite.load(domain, task) + observation_spec = env.observation_spec() + action_spec = env.action_spec() + + # Check action bounds. + if is_benchmark: + self._validate_control_range(action_spec.minimum, action_spec.maximum) + + # Step through the environment, applying random actions sampled within the + # valid range and check the observations, rewards, and discounts. + policy = uniform_random_policy(action_spec) + for time_step in step_environment(env, policy): + self._validate_observation(time_step.observation, observation_spec) + self._validate_discount(time_step) + if is_benchmark: + self._validate_reward_range(time_step) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_environment_is_deterministic(self, domain, task): + """Tests that identical seeds and actions produce identical trajectories.""" + seed = 0 + # Iterate over two trajectories generated using identical sequences of + # random actions, and with identical task random states. Check that the + # observations, rewards, discounts and step types are identical. + trajectory1 = make_trajectory(domain=domain, task=task, seed=seed) + trajectory2 = make_trajectory(domain=domain, task=task, seed=seed) + for time_step1, time_step2 in zip(trajectory1, trajectory2): + self.assertEqual(time_step1.step_type, time_step2.step_type) + self.assertEqual(time_step1.reward, time_step2.reward) + self.assertEqual(time_step1.discount, time_step2.discount) + for key in six.iterkeys(time_step1.observation): + np.testing.assert_array_equal( + time_step1.observation[key], time_step2.observation[key], + err_msg='Observation {!r} is not equal.'.format(key)) + + def assertCorrectColors(self, physics, reward): + colors = physics.named.model.mat_rgba + for material_name in ('self', 'effector', 'target'): + highlight = colors[material_name + '_highlight'] + default = colors[material_name + '_default'] + blend_coef = reward ** 4 + expected = blend_coef * highlight + (1.0 - blend_coef) * default + actual = colors[material_name] + err_msg = ('Material {!r} has unexpected color.\nExpected: {!r}\n' + 'Actual: {!r}'.format(material_name, expected, actual)) + np.testing.assert_array_almost_equal(expected, actual, err_msg=err_msg) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_visualize_reward(self, domain, task): + env = suite.load(domain, task) + env.task.visualize_reward = True + action = np.zeros(env.action_spec().shape) + + with mock.patch.object(env.task, 'get_reward') as mock_get_reward: + mock_get_reward.return_value = -3.0 # Rewards < 0 should be clipped. + env.reset() + mock_get_reward.assert_called_with(env.physics) + self.assertCorrectColors(env.physics, reward=0.0) + + mock_get_reward.reset_mock() + mock_get_reward.return_value = 0.5 + env.step(action) + mock_get_reward.assert_called_with(env.physics) + self.assertCorrectColors(env.physics, reward=mock_get_reward.return_value) + + mock_get_reward.reset_mock() + mock_get_reward.return_value = 2.0 # Rewards > 1 should be clipped. + env.step(action) + mock_get_reward.assert_called_with(env.physics) + self.assertCorrectColors(env.physics, reward=1.0) + + mock_get_reward.reset_mock() + mock_get_reward.return_value = 0.25 + env.reset() + mock_get_reward.assert_called_with(env.physics) + self.assertCorrectColors(env.physics, reward=mock_get_reward.return_value) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_task_supports_environment_kwargs(self, domain, task): + env = suite.load(domain, task, + environment_kwargs=dict(flat_observation=True)) + # Check that the kwargs are actually passed through to the environment. + self.assertSetEqual(set(env.observation_spec()), + {control.FLAT_OBSERVATION_KEY}) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_observation_arrays_dont_share_memory(self, domain, task): + env = suite.load(domain, task) + first_timestep = env.reset() + action = np.zeros(env.action_spec().shape) + second_timestep = env.step(action) + for name, first_array in six.iteritems(first_timestep.observation): + second_array = second_timestep.observation[name] + self.assertFalse( + np.may_share_memory(first_array, second_array), + msg='Consecutive observations of {!r} may share memory.'.format(name)) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_observations_dont_contain_constant_elements(self, domain, task): + env = suite.load(domain, task) + trajectory = make_trajectory(domain=domain, task=task, seed=0, + num_episodes=2, max_steps_per_episode=1000) + observations = {name: [] for name in env.observation_spec()} + for time_step in trajectory: + for name, array in six.iteritems(time_step.observation): + observations[name].append(array) + + failures = [] + + for name, array_list in six.iteritems(observations): + # Sampling random uniform actions generally isn't sufficient to trigger + # these touch sensors. + if (domain in ('manipulator', 'stacker') and name == 'touch' or + domain == 'quadruped' and name == 'force_torque'): + continue + stacked_arrays = np.array(array_list) + is_constant = np.all(stacked_arrays == stacked_arrays[0], axis=0) + has_constant_elements = ( + is_constant if np.isscalar(is_constant) else np.any(is_constant)) + if has_constant_elements: + failures.append((name, is_constant)) + + self.assertEmpty( + failures, + msg='The following observation(s) contain constant elements:\n{}' + .format('\n'.join(':\t'.join([name, str(is_constant)]) + for (name, is_constant) in failures))) + + @parameterized.parameters(*suite.ALL_TASKS) + def test_initial_state_is_randomized(self, domain, task): + env = suite.load(domain, task, task_kwargs={'random': 42}) + obs1 = env.reset().observation + obs2 = env.reset().observation + self.assertFalse( + all(np.all(obs1[k] == obs2[k]) for k in obs1), + 'Two consecutive initial states have identical observations.\n' + 'First: {}\nSecond: {}'.format(obs1, obs2)) + +if __name__ == '__main__': + absltest.main() diff --git a/local_dm_control_suite/tests/loader_test.py b/local_dm_control_suite/tests/loader_test.py new file mode 100755 index 0000000..cbce4f5 --- /dev/null +++ b/local_dm_control_suite/tests/loader_test.py @@ -0,0 +1,52 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Tests for the dm_control.suite loader.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Internal dependencies. + +from absl.testing import absltest + +from dm_control import suite +from dm_control.rl import control + + +class LoaderTest(absltest.TestCase): + + def test_load_without_kwargs(self): + env = suite.load('cartpole', 'swingup') + self.assertIsInstance(env, control.Environment) + + def test_load_with_kwargs(self): + env = suite.load('cartpole', 'swingup', + task_kwargs={'time_limit': 40, 'random': 99}) + self.assertIsInstance(env, control.Environment) + + +class LoaderConstantsTest(absltest.TestCase): + + def testSuiteConstants(self): + self.assertNotEmpty(suite.BENCHMARKING) + self.assertNotEmpty(suite.EASY) + self.assertNotEmpty(suite.HARD) + self.assertNotEmpty(suite.EXTRA) + + +if __name__ == '__main__': + absltest.main() diff --git a/local_dm_control_suite/tests/lqr_test.py b/local_dm_control_suite/tests/lqr_test.py new file mode 100755 index 0000000..d6edcf0 --- /dev/null +++ b/local_dm_control_suite/tests/lqr_test.py @@ -0,0 +1,88 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Tests specific to the LQR domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import unittest + +# Internal dependencies. +from absl import logging + +from absl.testing import absltest +from absl.testing import parameterized + +from local_dm_control_suite import lqr +from local_dm_control_suite import lqr_solver + +import numpy as np +from six.moves import range + + +class LqrTest(parameterized.TestCase): + + @parameterized.named_parameters( + ('lqr_2_1', lqr.lqr_2_1), + ('lqr_6_2', lqr.lqr_6_2)) + def test_lqr_optimal_policy(self, make_env): + env = make_env() + p, k, beta = lqr_solver.solve(env) + self.assertPolicyisOptimal(env, p, k, beta) + + @parameterized.named_parameters( + ('lqr_2_1', lqr.lqr_2_1), + ('lqr_6_2', lqr.lqr_6_2)) + @unittest.skipUnless( + condition=lqr_solver.sp, + reason='scipy is not available, so non-scipy DARE solver is the default.') + def test_lqr_optimal_policy_no_scipy(self, make_env): + env = make_env() + old_sp = lqr_solver.sp + try: + lqr_solver.sp = None # Force the solver to use the non-scipy code path. + p, k, beta = lqr_solver.solve(env) + finally: + lqr_solver.sp = old_sp + self.assertPolicyisOptimal(env, p, k, beta) + + def assertPolicyisOptimal(self, env, p, k, beta): + tolerance = 1e-3 + n_steps = int(math.ceil(math.log10(tolerance) / math.log10(beta))) + logging.info('%d timesteps for %g convergence.', n_steps, tolerance) + total_loss = 0.0 + + timestep = env.reset() + initial_state = np.hstack((timestep.observation['position'], + timestep.observation['velocity'])) + logging.info('Measuring total cost over %d steps.', n_steps) + for _ in range(n_steps): + x = np.hstack((timestep.observation['position'], + timestep.observation['velocity'])) + # u = k*x is the optimal policy + u = k.dot(x) + total_loss += 1 - (timestep.reward or 0.0) + timestep = env.step(u) + + logging.info('Analytical expected total cost is .5*x^T*p*x.') + expected_loss = .5 * initial_state.T.dot(p).dot(initial_state) + logging.info('Comparing measured and predicted costs.') + np.testing.assert_allclose(expected_loss, total_loss, rtol=tolerance) + +if __name__ == '__main__': + absltest.main() diff --git a/local_dm_control_suite/utils/__init__.py b/local_dm_control_suite/utils/__init__.py new file mode 100755 index 0000000..2ea19cf --- /dev/null +++ b/local_dm_control_suite/utils/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Utility functions used in the control suite.""" diff --git a/local_dm_control_suite/utils/parse_amc.py b/local_dm_control_suite/utils/parse_amc.py new file mode 100755 index 0000000..3cea2ab --- /dev/null +++ b/local_dm_control_suite/utils/parse_amc.py @@ -0,0 +1,251 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Parse and convert amc motion capture data.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control.mujoco.wrapper import mjbindings +import numpy as np +from scipy import interpolate +from six.moves import range + +mjlib = mjbindings.mjlib + +MOCAP_DT = 1.0/120.0 +CONVERSION_LENGTH = 0.056444 + +_CMU_MOCAP_JOINT_ORDER = ( + 'root0', 'root1', 'root2', 'root3', 'root4', 'root5', 'lowerbackrx', + 'lowerbackry', 'lowerbackrz', 'upperbackrx', 'upperbackry', 'upperbackrz', + 'thoraxrx', 'thoraxry', 'thoraxrz', 'lowerneckrx', 'lowerneckry', + 'lowerneckrz', 'upperneckrx', 'upperneckry', 'upperneckrz', 'headrx', + 'headry', 'headrz', 'rclaviclery', 'rclaviclerz', 'rhumerusrx', + 'rhumerusry', 'rhumerusrz', 'rradiusrx', 'rwristry', 'rhandrx', 'rhandrz', + 'rfingersrx', 'rthumbrx', 'rthumbrz', 'lclaviclery', 'lclaviclerz', + 'lhumerusrx', 'lhumerusry', 'lhumerusrz', 'lradiusrx', 'lwristry', + 'lhandrx', 'lhandrz', 'lfingersrx', 'lthumbrx', 'lthumbrz', 'rfemurrx', + 'rfemurry', 'rfemurrz', 'rtibiarx', 'rfootrx', 'rfootrz', 'rtoesrx', + 'lfemurrx', 'lfemurry', 'lfemurrz', 'ltibiarx', 'lfootrx', 'lfootrz', + 'ltoesrx' +) + +Converted = collections.namedtuple('Converted', + ['qpos', 'qvel', 'time']) + + +def convert(file_name, physics, timestep): + """Converts the parsed .amc values into qpos and qvel values and resamples. + + Args: + file_name: The .amc file to be parsed and converted. + physics: The corresponding physics instance. + timestep: Desired output interval between resampled frames. + + Returns: + A namedtuple with fields: + `qpos`, a numpy array containing converted positional variables. + `qvel`, a numpy array containing converted velocity variables. + `time`, a numpy array containing the corresponding times. + """ + frame_values = parse(file_name) + joint2index = {} + for name in physics.named.data.qpos.axes.row.names: + joint2index[name] = physics.named.data.qpos.axes.row.convert_key_item(name) + index2joint = {} + for joint, index in joint2index.items(): + if isinstance(index, slice): + indices = range(index.start, index.stop) + else: + indices = [index] + for ii in indices: + index2joint[ii] = joint + + # Convert frame_values to qpos + amcvals2qpos_transformer = Amcvals2qpos(index2joint, _CMU_MOCAP_JOINT_ORDER) + qpos_values = [] + for frame_value in frame_values: + qpos_values.append(amcvals2qpos_transformer(frame_value)) + qpos_values = np.stack(qpos_values) # Time by nq + + # Interpolate/resample. + # Note: interpolate quaternions rather than euler angles (slerp). + # see https://en.wikipedia.org/wiki/Slerp + qpos_values_resampled = [] + time_vals = np.arange(0, len(frame_values)*MOCAP_DT - 1e-8, MOCAP_DT) + time_vals_new = np.arange(0, len(frame_values)*MOCAP_DT, timestep) + while time_vals_new[-1] > time_vals[-1]: + time_vals_new = time_vals_new[:-1] + + for i in range(qpos_values.shape[1]): + f = interpolate.splrep(time_vals, qpos_values[:, i]) + qpos_values_resampled.append(interpolate.splev(time_vals_new, f)) + + qpos_values_resampled = np.stack(qpos_values_resampled) # nq by ntime + + qvel_list = [] + for t in range(qpos_values_resampled.shape[1]-1): + p_tp1 = qpos_values_resampled[:, t + 1] + p_t = qpos_values_resampled[:, t] + qvel = [(p_tp1[:3]-p_t[:3])/ timestep, + mj_quat2vel(mj_quatdiff(p_t[3:7], p_tp1[3:7]), timestep), + (p_tp1[7:]-p_t[7:])/ timestep] + qvel_list.append(np.concatenate(qvel)) + + qvel_values_resampled = np.vstack(qvel_list).T + + return Converted(qpos_values_resampled, qvel_values_resampled, time_vals_new) + + +def parse(file_name): + """Parses the amc file format.""" + values = [] + fid = open(file_name, 'r') + line = fid.readline().strip() + frame_ind = 1 + first_frame = True + while True: + # Parse first frame. + if first_frame and line[0] == str(frame_ind): + first_frame = False + frame_ind += 1 + frame_vals = [] + while True: + line = fid.readline().strip() + if not line or line == str(frame_ind): + values.append(np.array(frame_vals, dtype=np.float)) + break + tokens = line.split() + frame_vals.extend(tokens[1:]) + # Parse other frames. + elif line == str(frame_ind): + frame_ind += 1 + frame_vals = [] + while True: + line = fid.readline().strip() + if not line or line == str(frame_ind): + values.append(np.array(frame_vals, dtype=np.float)) + break + tokens = line.split() + frame_vals.extend(tokens[1:]) + else: + line = fid.readline().strip() + if not line: + break + return values + + +class Amcvals2qpos(object): + """Callable that converts .amc values for a frame and to MuJoCo qpos format. + """ + + def __init__(self, index2joint, joint_order): + """Initializes a new Amcvals2qpos instance. + + Args: + index2joint: List of joint angles in .amc file. + joint_order: List of joint names in MuJoco MJCF. + """ + # Root is x,y,z, then quat. + # need to get indices of qpos that order for amc default order + self.qpos_root_xyz_ind = [0, 1, 2] + self.root_xyz_ransform = np.array( + [[1, 0, 0], [0, 0, -1], [0, 1, 0]]) * CONVERSION_LENGTH + self.qpos_root_quat_ind = [3, 4, 5, 6] + amc2qpos_transform = np.zeros((len(index2joint), len(joint_order))) + for i in range(len(index2joint)): + for j in range(len(joint_order)): + if index2joint[i] == joint_order[j]: + if 'rx' in index2joint[i]: + amc2qpos_transform[i][j] = 1 + elif 'ry' in index2joint[i]: + amc2qpos_transform[i][j] = 1 + elif 'rz' in index2joint[i]: + amc2qpos_transform[i][j] = 1 + self.amc2qpos_transform = amc2qpos_transform + + def __call__(self, amc_val): + """Converts a `.amc` frame to MuJoCo qpos format.""" + amc_val_rad = np.deg2rad(amc_val) + qpos = np.dot(self.amc2qpos_transform, amc_val_rad) + + # Root. + qpos[:3] = np.dot(self.root_xyz_ransform, amc_val[:3]) + qpos_quat = euler2quat(amc_val[3], amc_val[4], amc_val[5]) + qpos_quat = mj_quatprod(euler2quat(90, 0, 0), qpos_quat) + + for i, ind in enumerate(self.qpos_root_quat_ind): + qpos[ind] = qpos_quat[i] + + return qpos + + +def euler2quat(ax, ay, az): + """Converts euler angles to a quaternion. + + Note: rotation order is zyx + + Args: + ax: Roll angle (deg) + ay: Pitch angle (deg). + az: Yaw angle (deg). + + Returns: + A numpy array representing the rotation as a quaternion. + """ + r1 = az + r2 = ay + r3 = ax + + c1 = np.cos(np.deg2rad(r1 / 2)) + s1 = np.sin(np.deg2rad(r1 / 2)) + c2 = np.cos(np.deg2rad(r2 / 2)) + s2 = np.sin(np.deg2rad(r2 / 2)) + c3 = np.cos(np.deg2rad(r3 / 2)) + s3 = np.sin(np.deg2rad(r3 / 2)) + + q0 = c1 * c2 * c3 + s1 * s2 * s3 + q1 = c1 * c2 * s3 - s1 * s2 * c3 + q2 = c1 * s2 * c3 + s1 * c2 * s3 + q3 = s1 * c2 * c3 - c1 * s2 * s3 + + return np.array([q0, q1, q2, q3]) + + +def mj_quatprod(q, r): + quaternion = np.zeros(4) + mjlib.mju_mulQuat(quaternion, np.ascontiguousarray(q), + np.ascontiguousarray(r)) + return quaternion + + +def mj_quat2vel(q, dt): + vel = np.zeros(3) + mjlib.mju_quat2Vel(vel, np.ascontiguousarray(q), dt) + return vel + + +def mj_quatneg(q): + quaternion = np.zeros(4) + mjlib.mju_negQuat(quaternion, np.ascontiguousarray(q)) + return quaternion + + +def mj_quatdiff(source, target): + return mj_quatprod(mj_quatneg(source), np.ascontiguousarray(target)) diff --git a/local_dm_control_suite/utils/parse_amc_test.py b/local_dm_control_suite/utils/parse_amc_test.py new file mode 100755 index 0000000..c8a9052 --- /dev/null +++ b/local_dm_control_suite/utils/parse_amc_test.py @@ -0,0 +1,68 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Tests for parse_amc utility.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +# Internal dependencies. + +from absl.testing import absltest +from local_dm_control_suite import humanoid_CMU +from dm_control.suite.utils import parse_amc + +from dm_control.utils import io as resources + +_TEST_AMC_PATH = resources.GetResourceFilename( + os.path.join(os.path.dirname(__file__), '../demos/zeros.amc')) + + +class ParseAMCTest(absltest.TestCase): + + def test_sizes_of_parsed_data(self): + + # Instantiate the humanoid environment. + env = humanoid_CMU.stand() + + # Parse and convert specified clip. + converted = parse_amc.convert( + _TEST_AMC_PATH, env.physics, env.control_timestep()) + + self.assertEqual(converted.qpos.shape[0], 63) + self.assertEqual(converted.qvel.shape[0], 62) + self.assertEqual(converted.time.shape[0], converted.qpos.shape[1]) + self.assertEqual(converted.qpos.shape[1], + converted.qvel.shape[1] + 1) + + # Parse and convert specified clip -- WITH SMALLER TIMESTEP + converted2 = parse_amc.convert( + _TEST_AMC_PATH, env.physics, 0.5 * env.control_timestep()) + + self.assertEqual(converted2.qpos.shape[0], 63) + self.assertEqual(converted2.qvel.shape[0], 62) + self.assertEqual(converted2.time.shape[0], converted2.qpos.shape[1]) + self.assertEqual(converted.qpos.shape[1], + converted.qvel.shape[1] + 1) + + # Compare sizes of parsed objects for different timesteps + self.assertEqual(converted.qpos.shape[1] * 2, converted2.qpos.shape[1]) + + +if __name__ == '__main__': + absltest.main() diff --git a/local_dm_control_suite/utils/randomizers.py b/local_dm_control_suite/utils/randomizers.py new file mode 100755 index 0000000..30ec182 --- /dev/null +++ b/local_dm_control_suite/utils/randomizers.py @@ -0,0 +1,91 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Randomization functions.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from dm_control.mujoco.wrapper import mjbindings +import numpy as np +from six.moves import range + + +def random_limited_quaternion(random, limit): + """Generates a random quaternion limited to the specified rotations.""" + axis = random.randn(3) + axis /= np.linalg.norm(axis) + angle = random.rand() * limit + + quaternion = np.zeros(4) + mjbindings.mjlib.mju_axisAngle2Quat(quaternion, axis, angle) + + return quaternion + + +def randomize_limited_and_rotational_joints(physics, random=None): + """Randomizes the positions of joints defined in the physics body. + + The following randomization rules apply: + - Bounded joints (hinges or sliders) are sampled uniformly in the bounds. + - Unbounded hinges are samples uniformly in [-pi, pi] + - Quaternions for unlimited free joints and ball joints are sampled + uniformly on the unit 3-sphere. + - Quaternions for limited ball joints are sampled uniformly on a sector + of the unit 3-sphere. + - The linear degrees of freedom of free joints are not randomized. + + Args: + physics: Instance of 'Physics' class that holds a loaded model. + random: Optional instance of 'np.random.RandomState'. Defaults to the global + NumPy random state. + """ + random = random or np.random + + hinge = mjbindings.enums.mjtJoint.mjJNT_HINGE + slide = mjbindings.enums.mjtJoint.mjJNT_SLIDE + ball = mjbindings.enums.mjtJoint.mjJNT_BALL + free = mjbindings.enums.mjtJoint.mjJNT_FREE + + qpos = physics.named.data.qpos + + for joint_id in range(physics.model.njnt): + joint_name = physics.model.id2name(joint_id, 'joint') + joint_type = physics.model.jnt_type[joint_id] + is_limited = physics.model.jnt_limited[joint_id] + range_min, range_max = physics.model.jnt_range[joint_id] + + if is_limited: + if joint_type == hinge or joint_type == slide: + qpos[joint_name] = random.uniform(range_min, range_max) + + elif joint_type == ball: + qpos[joint_name] = random_limited_quaternion(random, range_max) + + else: + if joint_type == hinge: + qpos[joint_name] = random.uniform(-np.pi, np.pi) + + elif joint_type == ball: + quat = random.randn(4) + quat /= np.linalg.norm(quat) + qpos[joint_name] = quat + + elif joint_type == free: + quat = random.rand(4) + quat /= np.linalg.norm(quat) + qpos[joint_name][3:] = quat + diff --git a/local_dm_control_suite/utils/randomizers_test.py b/local_dm_control_suite/utils/randomizers_test.py new file mode 100755 index 0000000..8b9b72d --- /dev/null +++ b/local_dm_control_suite/utils/randomizers_test.py @@ -0,0 +1,164 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Tests for randomizers.py.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Internal dependencies. +from absl.testing import absltest +from absl.testing import parameterized +from dm_control import mujoco +from dm_control.mujoco.wrapper import mjbindings +from dm_control.suite.utils import randomizers +import numpy as np +from six.moves import range + +mjlib = mjbindings.mjlib + + +class RandomizeUnlimitedJointsTest(parameterized.TestCase): + + def setUp(self): + self.rand = np.random.RandomState(100) + + def test_single_joint_of_each_type(self): + physics = mujoco.Physics.from_xml_string(""" + + + + + + + + + + + + + + + + + + + + + + + + + """) + + randomizers.randomize_limited_and_rotational_joints(physics, self.rand) + self.assertNotEqual(0., physics.named.data.qpos['hinge']) + self.assertNotEqual(0., physics.named.data.qpos['limited_hinge']) + self.assertNotEqual(0., physics.named.data.qpos['limited_slide']) + + self.assertNotEqual(0., np.sum(physics.named.data.qpos['ball'])) + self.assertNotEqual(0., np.sum(physics.named.data.qpos['limited_ball'])) + + self.assertNotEqual(0., np.sum(physics.named.data.qpos['free'][3:])) + + # Unlimited slide and the positional part of the free joint remains + # uninitialized. + self.assertEqual(0., physics.named.data.qpos['slide']) + self.assertEqual(0., np.sum(physics.named.data.qpos['free'][:3])) + + def test_multiple_joints_of_same_type(self): + physics = mujoco.Physics.from_xml_string(""" + + + + + + + + + """) + + randomizers.randomize_limited_and_rotational_joints(physics, self.rand) + self.assertNotEqual(0., physics.named.data.qpos['hinge_1']) + self.assertNotEqual(0., physics.named.data.qpos['hinge_2']) + self.assertNotEqual(0., physics.named.data.qpos['hinge_3']) + + self.assertNotEqual(physics.named.data.qpos['hinge_1'], + physics.named.data.qpos['hinge_2']) + + self.assertNotEqual(physics.named.data.qpos['hinge_2'], + physics.named.data.qpos['hinge_3']) + + self.assertNotEqual(physics.named.data.qpos['hinge_1'], + physics.named.data.qpos['hinge_3']) + + def test_unlimited_hinge_randomization_range(self): + physics = mujoco.Physics.from_xml_string(""" + + + + + + + """) + + for _ in range(10): + randomizers.randomize_limited_and_rotational_joints(physics, self.rand) + self.assertBetween(physics.named.data.qpos['hinge'], -np.pi, np.pi) + + def test_limited_1d_joint_limits_are_respected(self): + physics = mujoco.Physics.from_xml_string(""" + + + + + + + + + + + """) + + for _ in range(10): + randomizers.randomize_limited_and_rotational_joints(physics, self.rand) + self.assertBetween(physics.named.data.qpos['hinge'], + np.deg2rad(0), np.deg2rad(10)) + self.assertBetween(physics.named.data.qpos['slide'], 30, 50) + + def test_limited_ball_joint_are_respected(self): + physics = mujoco.Physics.from_xml_string(""" + + + + + + + """) + + body_axis = np.array([1., 0., 0.]) + joint_axis = np.zeros(3) + for _ in range(10): + randomizers.randomize_limited_and_rotational_joints(physics, self.rand) + + quat = physics.named.data.qpos['ball'] + mjlib.mju_rotVecQuat(joint_axis, body_axis, quat) + angle_cos = np.dot(body_axis, joint_axis) + self.assertGreater(angle_cos, 0.5) # cos(60) = 0.5 + + +if __name__ == '__main__': + absltest.main() diff --git a/local_dm_control_suite/walker.py b/local_dm_control_suite/walker.py new file mode 100755 index 0000000..b7bfd58 --- /dev/null +++ b/local_dm_control_suite/walker.py @@ -0,0 +1,158 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Planar Walker Domain.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from dm_control import mujoco +from dm_control.rl import control +from local_dm_control_suite import base +from local_dm_control_suite import common +from dm_control.suite.utils import randomizers +from dm_control.utils import containers +from dm_control.utils import rewards + + +_DEFAULT_TIME_LIMIT = 25 +_CONTROL_TIMESTEP = .025 + +# Minimal height of torso over foot above which stand reward is 1. +_STAND_HEIGHT = 1.2 + +# Horizontal speeds (meters/second) above which move reward is 1. +_WALK_SPEED = 1 +_RUN_SPEED = 8 + + +SUITE = containers.TaggedTasks() + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return common.read_model('walker.xml'), common.ASSETS + + +@SUITE.add('benchmarking') +def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Stand task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = PlanarWalker(move_speed=0, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add('benchmarking') +def walk(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Walk task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = PlanarWalker(move_speed=_WALK_SPEED, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@SUITE.add('benchmarking') +def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Run task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = PlanarWalker(move_speed=_RUN_SPEED, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Walker domain.""" + + def torso_upright(self): + """Returns projection from z-axes of torso to the z-axes of world.""" + return self.named.data.xmat['torso', 'zz'] + + def torso_height(self): + """Returns the height of the torso.""" + return self.named.data.xpos['torso', 'z'] + + def horizontal_velocity(self): + """Returns the horizontal velocity of the center-of-mass.""" + return self.named.data.sensordata['torso_subtreelinvel'][0] + + def orientations(self): + """Returns planar orientations of all bodies.""" + return self.named.data.xmat[1:, ['xx', 'xz']].ravel() + + +class PlanarWalker(base.Task): + """A planar walker task.""" + + def __init__(self, move_speed, random=None): + """Initializes an instance of `PlanarWalker`. + + Args: + move_speed: A float. If this value is zero, reward is given simply for + standing up. Otherwise this specifies a target horizontal velocity for + the walking task. + random: Optional, either a `numpy.random.RandomState` instance, an + integer seed for creating a new `RandomState`, or None to select a seed + automatically (default). + """ + self._move_speed = move_speed + super(PlanarWalker, self).__init__(random=random) + + def initialize_episode(self, physics): + """Sets the state of the environment at the start of each episode. + + In 'standing' mode, use initial orientation and small velocities. + In 'random' mode, randomize joint angles and let fall to the floor. + + Args: + physics: An instance of `Physics`. + + """ + randomizers.randomize_limited_and_rotational_joints(physics, self.random) + super(PlanarWalker, self).initialize_episode(physics) + + def get_observation(self, physics): + """Returns an observation of body orientations, height and velocites.""" + obs = collections.OrderedDict() + obs['orientations'] = physics.orientations() + obs['height'] = physics.torso_height() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + """Returns a reward to the agent.""" + standing = rewards.tolerance(physics.torso_height(), + bounds=(_STAND_HEIGHT, float('inf')), + margin=_STAND_HEIGHT/2) + upright = (1 + physics.torso_upright()) / 2 + stand_reward = (3*standing + upright) / 4 + if self._move_speed == 0: + return stand_reward + else: + move_reward = rewards.tolerance(physics.horizontal_velocity(), + bounds=(self._move_speed, float('inf')), + margin=self._move_speed/2, + value_at_margin=0.5, + sigmoid='linear') + return stand_reward * (5*move_reward + 1) / 6 diff --git a/local_dm_control_suite/walker.xml b/local_dm_control_suite/walker.xml new file mode 100755 index 0000000..d87ae82 --- /dev/null +++ b/local_dm_control_suite/walker.xml @@ -0,0 +1,70 @@ + + + + + + diff --git a/local_dm_control_suite/wrappers/__init__.py b/local_dm_control_suite/wrappers/__init__.py new file mode 100755 index 0000000..f7e4a68 --- /dev/null +++ b/local_dm_control_suite/wrappers/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2018 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Environment wrappers used to extend or modify environment behaviour.""" diff --git a/local_dm_control_suite/wrappers/action_noise.py b/local_dm_control_suite/wrappers/action_noise.py new file mode 100755 index 0000000..dab9970 --- /dev/null +++ b/local_dm_control_suite/wrappers/action_noise.py @@ -0,0 +1,74 @@ +# Copyright 2018 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Wrapper control suite environments that adds Gaussian noise to actions.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import dm_env +import numpy as np + + +_BOUNDS_MUST_BE_FINITE = ( + 'All bounds in `env.action_spec()` must be finite, got: {action_spec}') + + +class Wrapper(dm_env.Environment): + """Wraps a control environment and adds Gaussian noise to actions.""" + + def __init__(self, env, scale=0.01): + """Initializes a new action noise Wrapper. + + Args: + env: The control suite environment to wrap. + scale: The standard deviation of the noise, expressed as a fraction + of the max-min range for each action dimension. + + Raises: + ValueError: If any of the action dimensions of the wrapped environment are + unbounded. + """ + action_spec = env.action_spec() + if not (np.all(np.isfinite(action_spec.minimum)) and + np.all(np.isfinite(action_spec.maximum))): + raise ValueError(_BOUNDS_MUST_BE_FINITE.format(action_spec=action_spec)) + self._minimum = action_spec.minimum + self._maximum = action_spec.maximum + self._noise_std = scale * (action_spec.maximum - action_spec.minimum) + self._env = env + + def step(self, action): + noisy_action = action + self._env.task.random.normal(scale=self._noise_std) + # Clip the noisy actions in place so that they fall within the bounds + # specified by the `action_spec`. Note that MuJoCo implicitly clips out-of- + # bounds control inputs, but we also clip here in case the actions do not + # correspond directly to MuJoCo actuators, or if there are other wrapper + # layers that expect the actions to be within bounds. + np.clip(noisy_action, self._minimum, self._maximum, out=noisy_action) + return self._env.step(noisy_action) + + def reset(self): + return self._env.reset() + + def observation_spec(self): + return self._env.observation_spec() + + def action_spec(self): + return self._env.action_spec() + + def __getattr__(self, name): + return getattr(self._env, name) diff --git a/local_dm_control_suite/wrappers/action_noise_test.py b/local_dm_control_suite/wrappers/action_noise_test.py new file mode 100755 index 0000000..dcc5330 --- /dev/null +++ b/local_dm_control_suite/wrappers/action_noise_test.py @@ -0,0 +1,136 @@ +# Copyright 2018 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Tests for the action noise wrapper.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Internal dependencies. +from absl.testing import absltest +from absl.testing import parameterized +from dm_control.rl import control +from dm_control.suite.wrappers import action_noise +from dm_env import specs +import mock +import numpy as np + + +class ActionNoiseTest(parameterized.TestCase): + + def make_action_spec(self, lower=(-1.,), upper=(1.,)): + lower, upper = np.broadcast_arrays(lower, upper) + return specs.BoundedArray( + shape=lower.shape, dtype=float, minimum=lower, maximum=upper) + + def make_mock_env(self, action_spec=None): + action_spec = action_spec or self.make_action_spec() + env = mock.Mock(spec=control.Environment) + env.action_spec.return_value = action_spec + return env + + def assertStepCalledOnceWithCorrectAction(self, env, expected_action): + # NB: `assert_called_once_with()` doesn't support numpy arrays. + env.step.assert_called_once() + actual_action = env.step.call_args_list[0][0][0] + np.testing.assert_array_equal(expected_action, actual_action) + + @parameterized.parameters([ + dict(lower=np.r_[-1., 0.], upper=np.r_[1., 2.], scale=0.05), + dict(lower=np.r_[-1., 0.], upper=np.r_[1., 2.], scale=0.), + dict(lower=np.r_[-1., 0.], upper=np.r_[-1., 0.], scale=0.05), + ]) + def test_step(self, lower, upper, scale): + seed = 0 + std = scale * (upper - lower) + expected_noise = np.random.RandomState(seed).normal(scale=std) + action = np.random.RandomState(seed).uniform(lower, upper) + expected_noisy_action = np.clip(action + expected_noise, lower, upper) + task = mock.Mock(spec=control.Task) + task.random = np.random.RandomState(seed) + action_spec = self.make_action_spec(lower=lower, upper=upper) + env = self.make_mock_env(action_spec=action_spec) + env.task = task + wrapped_env = action_noise.Wrapper(env, scale=scale) + time_step = wrapped_env.step(action) + self.assertStepCalledOnceWithCorrectAction(env, expected_noisy_action) + self.assertIs(time_step, env.step(expected_noisy_action)) + + @parameterized.named_parameters([ + dict(testcase_name='within_bounds', action=np.r_[-1.], noise=np.r_[0.1]), + dict(testcase_name='below_lower', action=np.r_[-1.], noise=np.r_[-0.1]), + dict(testcase_name='above_upper', action=np.r_[1.], noise=np.r_[0.1]), + ]) + def test_action_clipping(self, action, noise): + lower = -1. + upper = 1. + expected_noisy_action = np.clip(action + noise, lower, upper) + task = mock.Mock(spec=control.Task) + task.random = mock.Mock(spec=np.random.RandomState) + task.random.normal.return_value = noise + action_spec = self.make_action_spec(lower=lower, upper=upper) + env = self.make_mock_env(action_spec=action_spec) + env.task = task + wrapped_env = action_noise.Wrapper(env) + time_step = wrapped_env.step(action) + self.assertStepCalledOnceWithCorrectAction(env, expected_noisy_action) + self.assertIs(time_step, env.step(expected_noisy_action)) + + @parameterized.parameters([ + dict(lower=np.r_[-1., 0.], upper=np.r_[1., np.inf]), + dict(lower=np.r_[np.nan, 0.], upper=np.r_[1., 2.]), + ]) + def test_error_if_action_bounds_non_finite(self, lower, upper): + action_spec = self.make_action_spec(lower=lower, upper=upper) + env = self.make_mock_env(action_spec=action_spec) + with self.assertRaisesWithLiteralMatch( + ValueError, + action_noise._BOUNDS_MUST_BE_FINITE.format(action_spec=action_spec)): + _ = action_noise.Wrapper(env) + + def test_reset(self): + env = self.make_mock_env() + wrapped_env = action_noise.Wrapper(env) + time_step = wrapped_env.reset() + env.reset.assert_called_once_with() + self.assertIs(time_step, env.reset()) + + def test_observation_spec(self): + env = self.make_mock_env() + wrapped_env = action_noise.Wrapper(env) + observation_spec = wrapped_env.observation_spec() + env.observation_spec.assert_called_once_with() + self.assertIs(observation_spec, env.observation_spec()) + + def test_action_spec(self): + env = self.make_mock_env() + wrapped_env = action_noise.Wrapper(env) + # `env.action_spec()` is called in `Wrapper.__init__()` + env.action_spec.reset_mock() + action_spec = wrapped_env.action_spec() + env.action_spec.assert_called_once_with() + self.assertIs(action_spec, env.action_spec()) + + @parameterized.parameters(['task', 'physics', 'control_timestep']) + def test_getattr(self, attribute_name): + env = self.make_mock_env() + wrapped_env = action_noise.Wrapper(env) + attr = getattr(wrapped_env, attribute_name) + self.assertIs(attr, getattr(env, attribute_name)) + + +if __name__ == '__main__': + absltest.main() diff --git a/local_dm_control_suite/wrappers/pixels.py b/local_dm_control_suite/wrappers/pixels.py new file mode 100755 index 0000000..0f55fff --- /dev/null +++ b/local_dm_control_suite/wrappers/pixels.py @@ -0,0 +1,120 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Wrapper that adds pixel observations to a control environment.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +import dm_env +from dm_env import specs + +STATE_KEY = 'state' + + +class Wrapper(dm_env.Environment): + """Wraps a control environment and adds a rendered pixel observation.""" + + def __init__(self, env, pixels_only=True, render_kwargs=None, + observation_key='pixels'): + """Initializes a new pixel Wrapper. + + Args: + env: The environment to wrap. + pixels_only: If True (default), the original set of 'state' observations + returned by the wrapped environment will be discarded, and the + `OrderedDict` of observations will only contain pixels. If False, the + `OrderedDict` will contain the original observations as well as the + pixel observations. + render_kwargs: Optional `dict` containing keyword arguments passed to the + `mujoco.Physics.render` method. + observation_key: Optional custom string specifying the pixel observation's + key in the `OrderedDict` of observations. Defaults to 'pixels'. + + Raises: + ValueError: If `env`'s observation spec is not compatible with the + wrapper. Supported formats are a single array, or a dict of arrays. + ValueError: If `env`'s observation already contains the specified + `observation_key`. + """ + if render_kwargs is None: + render_kwargs = {} + + wrapped_observation_spec = env.observation_spec() + + if isinstance(wrapped_observation_spec, specs.Array): + self._observation_is_dict = False + invalid_keys = set([STATE_KEY]) + elif isinstance(wrapped_observation_spec, collections.MutableMapping): + self._observation_is_dict = True + invalid_keys = set(wrapped_observation_spec.keys()) + else: + raise ValueError('Unsupported observation spec structure.') + + if not pixels_only and observation_key in invalid_keys: + raise ValueError('Duplicate or reserved observation key {!r}.' + .format(observation_key)) + + if pixels_only: + self._observation_spec = collections.OrderedDict() + elif self._observation_is_dict: + self._observation_spec = wrapped_observation_spec.copy() + else: + self._observation_spec = collections.OrderedDict() + self._observation_spec[STATE_KEY] = wrapped_observation_spec + + # Extend observation spec. + pixels = env.physics.render(**render_kwargs) + pixels_spec = specs.Array( + shape=pixels.shape, dtype=pixels.dtype, name=observation_key) + self._observation_spec[observation_key] = pixels_spec + + self._env = env + self._pixels_only = pixels_only + self._render_kwargs = render_kwargs + self._observation_key = observation_key + + def reset(self): + time_step = self._env.reset() + return self._add_pixel_observation(time_step) + + def step(self, action): + time_step = self._env.step(action) + return self._add_pixel_observation(time_step) + + def observation_spec(self): + return self._observation_spec + + def action_spec(self): + return self._env.action_spec() + + def _add_pixel_observation(self, time_step): + if self._pixels_only: + observation = collections.OrderedDict() + elif self._observation_is_dict: + observation = type(time_step.observation)(time_step.observation) + else: + observation = collections.OrderedDict() + observation[STATE_KEY] = time_step.observation + + pixels = self._env.physics.render(**self._render_kwargs) + observation[self._observation_key] = pixels + return time_step._replace(observation=observation) + + def __getattr__(self, name): + return getattr(self._env, name) diff --git a/local_dm_control_suite/wrappers/pixels_test.py b/local_dm_control_suite/wrappers/pixels_test.py new file mode 100755 index 0000000..26b7fc1 --- /dev/null +++ b/local_dm_control_suite/wrappers/pixels_test.py @@ -0,0 +1,133 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Tests for the pixel wrapper.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +# Internal dependencies. +from absl.testing import absltest +from absl.testing import parameterized +from local_dm_control_suite import cartpole +from dm_control.suite.wrappers import pixels +import dm_env +from dm_env import specs + +import numpy as np + + +class FakePhysics(object): + + def render(self, *args, **kwargs): + del args + del kwargs + return np.zeros((4, 5, 3), dtype=np.uint8) + + +class FakeArrayObservationEnvironment(dm_env.Environment): + + def __init__(self): + self.physics = FakePhysics() + + def reset(self): + return dm_env.restart(np.zeros((2,))) + + def step(self, action): + del action + return dm_env.transition(0.0, np.zeros((2,))) + + def action_spec(self): + pass + + def observation_spec(self): + return specs.Array(shape=(2,), dtype=np.float) + + +class PixelsTest(parameterized.TestCase): + + @parameterized.parameters(True, False) + def test_dict_observation(self, pixels_only): + pixel_key = 'rgb' + + env = cartpole.swingup() + + # Make sure we are testing the right environment for the test. + observation_spec = env.observation_spec() + self.assertIsInstance(observation_spec, collections.OrderedDict) + + width = 320 + height = 240 + + # The wrapper should only add one observation. + wrapped = pixels.Wrapper(env, + observation_key=pixel_key, + pixels_only=pixels_only, + render_kwargs={'width': width, 'height': height}) + + wrapped_observation_spec = wrapped.observation_spec() + self.assertIsInstance(wrapped_observation_spec, collections.OrderedDict) + + if pixels_only: + self.assertLen(wrapped_observation_spec, 1) + self.assertEqual([pixel_key], list(wrapped_observation_spec.keys())) + else: + expected_length = len(observation_spec) + 1 + self.assertLen(wrapped_observation_spec, expected_length) + expected_keys = list(observation_spec.keys()) + [pixel_key] + self.assertEqual(expected_keys, list(wrapped_observation_spec.keys())) + + # Check that the added spec item is consistent with the added observation. + time_step = wrapped.reset() + rgb_observation = time_step.observation[pixel_key] + wrapped_observation_spec[pixel_key].validate(rgb_observation) + + self.assertEqual(rgb_observation.shape, (height, width, 3)) + self.assertEqual(rgb_observation.dtype, np.uint8) + + @parameterized.parameters(True, False) + def test_single_array_observation(self, pixels_only): + pixel_key = 'depth' + + env = FakeArrayObservationEnvironment() + observation_spec = env.observation_spec() + self.assertIsInstance(observation_spec, specs.Array) + + wrapped = pixels.Wrapper(env, observation_key=pixel_key, + pixels_only=pixels_only) + wrapped_observation_spec = wrapped.observation_spec() + self.assertIsInstance(wrapped_observation_spec, collections.OrderedDict) + + if pixels_only: + self.assertLen(wrapped_observation_spec, 1) + self.assertEqual([pixel_key], list(wrapped_observation_spec.keys())) + else: + self.assertLen(wrapped_observation_spec, 2) + self.assertEqual([pixels.STATE_KEY, pixel_key], + list(wrapped_observation_spec.keys())) + + time_step = wrapped.reset() + + depth_observation = time_step.observation[pixel_key] + wrapped_observation_spec[pixel_key].validate(depth_observation) + + self.assertEqual(depth_observation.shape, (4, 5, 3)) + self.assertEqual(depth_observation.dtype, np.uint8) + +if __name__ == '__main__': + absltest.main() diff --git a/run.py b/run.py index f2dd5e2..b3433e5 100644 --- a/run.py +++ b/run.py @@ -8,14 +8,15 @@ import os.path as osp from functools import partial import os import gym +import dmc2gym + +import utils import tensorflow as tf from baselines import logger from baselines.bench import Monitor from baselines.common.atari_wrappers import NoopResetEnv, FrameStack from mpi4py import MPI - - from dynamic_bottleneck import DynamicBottleneck from cnn_policy import CnnPolicy from cppo_agent import PpoOptimizer @@ -128,6 +129,24 @@ class Trainer(object): def make_env_all_params(rank, add_monitor, args): + env = dmc2gym.make( + domain_name='cartpole', + task_name='swingup', + seed=args["seed"], + visualize_reward=False, + from_pixels='pixel', + height=84, + width=84, + frame_skip=4, + img_source=args["img_source"], + resource_files=args["resource_files"], + total_frames=args["total_frames"] + ) + env.seed(args["seed"]) + + env = utils.FrameStack(env, k=4) + + """ if args["env_kind"] == 'atari': env = gym.make(args['env']) assert 'NoFrameskip' in env.spec.id @@ -158,7 +177,7 @@ def make_env_all_params(rank, add_monitor, args): env = make_robo_pong() elif args["env"] == "hockey": env = make_robo_hockey() - + """ if add_monitor: env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank)) return env @@ -211,6 +230,9 @@ def add_environments_params(parser): parser.add_argument('--stickyAtari', action='store_true', default=False) parser.add_argument('--pixelNoise', action='store_true', default=False) parser.add_argument('--randomBoxNoise', action='store_true', default=False) + parser.add_argument('--img_source', default=None, type=str, choices=['color', 'noise', 'images', 'video', 'none']) + parser.add_argument('--resource_files', type=str) + parser.add_argument('--total_frames', default=100, type=int) def add_optimization_params(parser): diff --git a/utils.py b/utils.py index 9d26bb9..5236912 100644 --- a/utils.py +++ b/utils.py @@ -2,7 +2,9 @@ import multiprocessing import os import platform from functools import partial +from collections import deque +import gym import numpy as np import tensorflow as tf from baselines.common.tf_util import normc_initializer @@ -428,3 +430,33 @@ class ExponentialSchedule(object): def value(self, t): v = self.start_value * np.power(1.0 - self.decay_factor, t/int(1e5)) return np.maximum(v, self.end_value) + +class FrameStack(gym.Wrapper): + def __init__(self, env, k): + gym.Wrapper.__init__(self, env) + self._k = k + self._frames = deque([], maxlen=k) + shp = env.observation_space.shape + self.observation_space = gym.spaces.Box( + low=0, + high=1, + shape=((shp[0] * k,) + shp[1:]), + dtype=env.observation_space.dtype + ) + self._max_episode_steps = env._max_episode_steps + + def reset(self): + obs = self.env.reset() + for _ in range(self._k): + self._frames.append(obs) + return self._get_obs() + + def step(self, action): + obs, reward, done, info = self.env.step(action) + self._frames.append(obs) + return self._get_obs(), reward, done, info + + def _get_obs(self): + assert len(self._frames) == self._k + return np.concatenate(list(self._frames), axis=0) + \ No newline at end of file