diff --git a/PPO2/PPO.py b/PPO2/PPO.py index c071b9b..286203d 100644 --- a/PPO2/PPO.py +++ b/PPO2/PPO.py @@ -48,17 +48,17 @@ class ActorCritic(nn.Module): if has_continuous_action_space : self.actor = nn.Sequential( nn.Linear(state_dim, 64), - nn.Tanh(), + # nn.Tanh(), # nn.Sigmoid(), - # nn.ReLU(), + nn.ReLU(), nn.Linear(64, 64), - nn.Tanh(), + # nn.Tanh(), # nn.Sigmoid(), - # nn.ReLU(), + nn.ReLU(), nn.Linear(64, action_dim), - nn.Tanh() + # nn.Tanh() # nn.Sigmoid() - # nn.ReLU() + nn.ReLU() ) else: self.actor = nn.Sequential( @@ -72,13 +72,13 @@ class ActorCritic(nn.Module): # critic self.critic = nn.Sequential( nn.Linear(state_dim, 64), - nn.Tanh(), + # nn.Tanh(), # nn.Sigmoid(), - # nn.ReLU(), + nn.ReLU(), nn.Linear(64, 64), - nn.Tanh(), + # nn.Tanh(), # nn.Sigmoid(), - # nn.ReLU(), + nn.ReLU(), nn.Linear(64, 1) )