HPCC2025/ray/cartpole_ppo.py
weixin_46229132 3086413171 修改car_pos
2025-03-13 21:28:30 +08:00

32 lines
983 B
Python

from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig
from ray.rllib.utils.test_utils import add_rllib_example_script_args
parser = add_rllib_example_script_args(default_reward=450.0, default_timesteps=300000)
parser.set_defaults(enable_new_api_stack=True)
# Use `parser` to add your own custom command line options to this script
# and (if needed) use their values to set up `config` below.
args = parser.parse_args()
config = (
PPOConfig()
.environment("CartPole-v1")
.training(
lr=0.0003,
num_epochs=6,
vf_loss_coeff=0.01,
)
.rl_module(
model_config=DefaultModelConfig(
fcnet_hiddens=[32],
fcnet_activation="linear",
vf_share_layers=True,
),
)
)
if __name__ == "__main__":
from ray.rllib.utils.test_utils import run_rllib_example_script_experiment
run_rllib_example_script_experiment(config, args)