import gymnasium as gym from ray import tune from ray.rllib.algorithms.ppo import PPOConfig from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig from ray.rllib.utils.test_utils import add_rllib_example_script_args from env import PartitionMazeEnv # 导入自定义环境 # 注册自定义环境 gym.envs.register( id='PartitionMazeEnv-v0', entry_point='env:PartitionMazeEnv', ) parser = add_rllib_example_script_args(default_reward=450.0, default_timesteps=300000) parser.set_defaults(enable_new_api_stack=True) args = parser.parse_args() config = ( PPOConfig() .environment("PartitionMazeEnv-v0") .training( lr=0.0003, num_epochs=6, vf_loss_coeff=0.01, ) .rl_module( model_config=DefaultModelConfig( fcnet_hiddens=[32], fcnet_activation="linear", vf_share_layers=True, ), ) ) if __name__ == "__main__": from ray.rllib.utils.test_utils import run_rllib_example_script_experiment run_rllib_example_script_experiment(config, args=args)