43 lines
1022 B
Python
43 lines
1022 B
Python
import mdpsolver
|
|
import random
|
|
import sys
|
|
import numpy as np
|
|
from random import randint
|
|
|
|
#TEST 1
|
|
#Simple MDP with 3 states and 2 actions in each state.
|
|
|
|
#---------------------------------------
|
|
# CONFIGURATION 1
|
|
#---------------------------------------
|
|
|
|
#rewards
|
|
#1st index: from (current) states
|
|
#2nd index: actions
|
|
rewards = [[5,-1],
|
|
[1,-2],
|
|
[50,0]]
|
|
|
|
#transition probabilities
|
|
#1st index: from (current) states
|
|
#2nd index: actions
|
|
#3rd index: to (next) states
|
|
tranMatWithZeros = [[[0.9,0.1,0.0],[0.1,0.9,0.0]],
|
|
[[0.4,0.5,0.1],[0.3,0.5,0.2]],
|
|
[[0.2,0.2,0.6],[0.5,0.5,0.0]]]
|
|
|
|
#initial policy
|
|
random.seed(10)
|
|
initPolicy = [randint(0, 1) for p in range(0, 3)]
|
|
|
|
#Model 1a (discounted reward, parallel)
|
|
mdl1a = mdpsolver.model()
|
|
mdl1a.mdp(discount=0.95,
|
|
rewards=rewards,
|
|
tranMatWithZeros=tranMatWithZeros)
|
|
mdl1a.solve(algorithm="mpi",
|
|
update="standard",
|
|
parallel=True,
|
|
initPolicy=initPolicy)
|
|
|
|
print(mdl1a.getPolicy()) |