保存当前状态

2025-03-11 15:46:11 +08:00 · 2025-03-11 15:46:11 +08:00 · e7a4395340
commit e7a4395340
parent 01c6a71b4f
4 changed files with 43 additions and 0 deletions
--- a/GA/ga.py
+++ b/GA/ga.py
--- a/GA/hybrid_solver.py
+++ b/GA/hybrid_solver.py
--- a/GA/plot_util.py
+++ b/GA/plot_util.py
--- a/MDP/test_mdpsovler.py
+++ b/MDP/test_mdpsovler.py
@ -0,0 +1,43 @@
 import mdpsolver
 import random
 import sys
 import numpy as np
 from random import randint
 #TEST 1
 #Simple MDP with 3 states and 2 actions in each state.
 #---------------------------------------
 # CONFIGURATION 1
 #---------------------------------------
 #rewards
 #1st index: from (current) states
 #2nd index: actions
 rewards = [[5,-1],
           [1,-2],
           [50,0]]
 #transition probabilities
 #1st index: from (current) states
 #2nd index: actions
 #3rd index: to (next) states 
 tranMatWithZeros = [[[0.9,0.1,0.0],[0.1,0.9,0.0]],
                    [[0.4,0.5,0.1],[0.3,0.5,0.2]],
                    [[0.2,0.2,0.6],[0.5,0.5,0.0]]]
 #initial policy
 random.seed(10)
 initPolicy = [randint(0, 1) for p in range(0, 3)]
 #Model 1a (discounted reward, parallel)
 mdl1a = mdpsolver.model()
 mdl1a.mdp(discount=0.95,
        rewards=rewards,
        tranMatWithZeros=tranMatWithZeros)
 mdl1a.solve(algorithm="mpi",
          update="standard",
          parallel=True,
          initPolicy=initPolicy)
 print(mdl1a.getPolicy())