32 lines
983 B
Python
32 lines
983 B
Python
from ray.rllib.algorithms.ppo import PPOConfig
|
|
from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig
|
|
from ray.rllib.utils.test_utils import add_rllib_example_script_args
|
|
|
|
parser = add_rllib_example_script_args(default_reward=450.0, default_timesteps=300000)
|
|
parser.set_defaults(enable_new_api_stack=True)
|
|
# Use `parser` to add your own custom command line options to this script
|
|
# and (if needed) use their values to set up `config` below.
|
|
args = parser.parse_args()
|
|
|
|
config = (
|
|
PPOConfig()
|
|
.environment("CartPole-v1")
|
|
.training(
|
|
lr=0.0003,
|
|
num_epochs=6,
|
|
vf_loss_coeff=0.01,
|
|
)
|
|
.rl_module(
|
|
model_config=DefaultModelConfig(
|
|
fcnet_hiddens=[32],
|
|
fcnet_activation="linear",
|
|
vf_share_layers=True,
|
|
),
|
|
)
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
from ray.rllib.utils.test_utils import run_rllib_example_script_experiment
|
|
|
|
run_rllib_example_script_experiment(config, args) |