From e7a439534074987e36bac46259ad08ec7dd46eab Mon Sep 17 00:00:00 2001
From: weixin_46229132 <weixin_46229132@noreply.gitcode.com>
Date: Tue, 11 Mar 2025 15:46:11 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=9D=E5=AD=98=E5=BD=93=E5=89=8D=E7=8A=B6?=
 =?UTF-8?q?=E6=80=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ga.py => GA/ga.py                       |  0
 hybrid_solver.py => GA/hybrid_solver.py |  0
 plot_util.py => GA/plot_util.py         |  0
 MDP/test_mdpsovler.py                   | 43 +++++++++++++++++++++++++
 4 files changed, 43 insertions(+)
 rename ga.py => GA/ga.py (100%)
 rename hybrid_solver.py => GA/hybrid_solver.py (100%)
 rename plot_util.py => GA/plot_util.py (100%)
 create mode 100644 MDP/test_mdpsovler.py

diff --git a/ga.py b/GA/ga.py
similarity index 100%
rename from ga.py
rename to GA/ga.py
diff --git a/hybrid_solver.py b/GA/hybrid_solver.py
similarity index 100%
rename from hybrid_solver.py
rename to GA/hybrid_solver.py
diff --git a/plot_util.py b/GA/plot_util.py
similarity index 100%
rename from plot_util.py
rename to GA/plot_util.py
diff --git a/MDP/test_mdpsovler.py b/MDP/test_mdpsovler.py
new file mode 100644
index 0000000..69360b8
--- /dev/null
+++ b/MDP/test_mdpsovler.py
@@ -0,0 +1,43 @@
+import mdpsolver
+import random
+import sys
+import numpy as np
+from random import randint
+
+#TEST 1
+#Simple MDP with 3 states and 2 actions in each state.
+
+#---------------------------------------
+# CONFIGURATION 1
+#---------------------------------------
+
+#rewards
+#1st index: from (current) states
+#2nd index: actions
+rewards = [[5,-1],
+           [1,-2],
+           [50,0]]
+
+#transition probabilities
+#1st index: from (current) states
+#2nd index: actions
+#3rd index: to (next) states 
+tranMatWithZeros = [[[0.9,0.1,0.0],[0.1,0.9,0.0]],
+                    [[0.4,0.5,0.1],[0.3,0.5,0.2]],
+                    [[0.2,0.2,0.6],[0.5,0.5,0.0]]]
+
+#initial policy
+random.seed(10)
+initPolicy = [randint(0, 1) for p in range(0, 3)]
+
+#Model 1a (discounted reward, parallel)
+mdl1a = mdpsolver.model()
+mdl1a.mdp(discount=0.95,
+        rewards=rewards,
+        tranMatWithZeros=tranMatWithZeros)
+mdl1a.solve(algorithm="mpi",
+          update="standard",
+          parallel=True,
+          initPolicy=initPolicy)
+
+print(mdl1a.getPolicy())
\ No newline at end of file