保存当前状态

2025-03-11 15:46:11 +08:00 · 2025-03-11 15:46:11 +08:00 · e7a4395340
commit e7a4395340
parent 01c6a71b4f
4 changed files with 43 additions and 0 deletions
--- a/GA/ga.py
+++ b/GA/ga.py
--- a/GA/hybrid_solver.py
+++ b/GA/hybrid_solver.py
--- a/GA/plot_util.py
+++ b/GA/plot_util.py
--- a/MDP/test_mdpsovler.py
+++ b/MDP/test_mdpsovler.py
@ -0,0 +1,43 @@
+import mdpsolver
+import random
+import sys
+import numpy as np
+from random import randint
+
+#TEST 1
+#Simple MDP with 3 states and 2 actions in each state.
+
+#---------------------------------------
+# CONFIGURATION 1
+#---------------------------------------
+
+#rewards
+#1st index: from (current) states
+#2nd index: actions
+rewards = [[5,-1],
+           [1,-2],
+           [50,0]]
+
+#transition probabilities
+#1st index: from (current) states
+#2nd index: actions
+#3rd index: to (next) states 
+tranMatWithZeros = [[[0.9,0.1,0.0],[0.1,0.9,0.0]],
+                    [[0.4,0.5,0.1],[0.3,0.5,0.2]],
+                    [[0.2,0.2,0.6],[0.5,0.5,0.0]]]
+
+#initial policy
+random.seed(10)
+initPolicy = [randint(0, 1) for p in range(0, 3)]
+
+#Model 1a (discounted reward, parallel)
+mdl1a = mdpsolver.model()
+mdl1a.mdp(discount=0.95,
+        rewards=rewards,
+        tranMatWithZeros=tranMatWithZeros)
+mdl1a.solve(algorithm="mpi",
+          update="standard",
+          parallel=True,
+          initPolicy=initPolicy)
+
+print(mdl1a.getPolicy())