From e7a439534074987e36bac46259ad08ec7dd46eab Mon Sep 17 00:00:00 2001 From: weixin_46229132 Date: Tue, 11 Mar 2025 15:46:11 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=9D=E5=AD=98=E5=BD=93=E5=89=8D=E7=8A=B6?= =?UTF-8?q?=E6=80=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ga.py => GA/ga.py | 0 hybrid_solver.py => GA/hybrid_solver.py | 0 plot_util.py => GA/plot_util.py | 0 MDP/test_mdpsovler.py | 43 +++++++++++++++++++++++++ 4 files changed, 43 insertions(+) rename ga.py => GA/ga.py (100%) rename hybrid_solver.py => GA/hybrid_solver.py (100%) rename plot_util.py => GA/plot_util.py (100%) create mode 100644 MDP/test_mdpsovler.py diff --git a/ga.py b/GA/ga.py similarity index 100% rename from ga.py rename to GA/ga.py diff --git a/hybrid_solver.py b/GA/hybrid_solver.py similarity index 100% rename from hybrid_solver.py rename to GA/hybrid_solver.py diff --git a/plot_util.py b/GA/plot_util.py similarity index 100% rename from plot_util.py rename to GA/plot_util.py diff --git a/MDP/test_mdpsovler.py b/MDP/test_mdpsovler.py new file mode 100644 index 0000000..69360b8 --- /dev/null +++ b/MDP/test_mdpsovler.py @@ -0,0 +1,43 @@ +import mdpsolver +import random +import sys +import numpy as np +from random import randint + +#TEST 1 +#Simple MDP with 3 states and 2 actions in each state. + +#--------------------------------------- +# CONFIGURATION 1 +#--------------------------------------- + +#rewards +#1st index: from (current) states +#2nd index: actions +rewards = [[5,-1], + [1,-2], + [50,0]] + +#transition probabilities +#1st index: from (current) states +#2nd index: actions +#3rd index: to (next) states +tranMatWithZeros = [[[0.9,0.1,0.0],[0.1,0.9,0.0]], + [[0.4,0.5,0.1],[0.3,0.5,0.2]], + [[0.2,0.2,0.6],[0.5,0.5,0.0]]] + +#initial policy +random.seed(10) +initPolicy = [randint(0, 1) for p in range(0, 3)] + +#Model 1a (discounted reward, parallel) +mdl1a = mdpsolver.model() +mdl1a.mdp(discount=0.95, + rewards=rewards, + tranMatWithZeros=tranMatWithZeros) +mdl1a.solve(algorithm="mpi", + update="standard", + parallel=True, + initPolicy=initPolicy) + +print(mdl1a.getPolicy()) \ No newline at end of file