% MS-E2191 Graduate seminar on operations research
% Homework 5 model solution
% Emil af Björkesten, emil.afbjorkesten@aalto.fi

clear all;
%% a)
% Starting at 1, what is the distribution at t=10?

% Define the transition matrix
P=[.15, .15,    0,    .7,   0,  0,  0,  0,  0;
 .1,  .1,  .1,  0,    .7,   0,  0,  0,  0;
 0,   .15, .15, 0,    0,    .7, 0,  0,  0;
 .1,  0,    0,  .1,   .1,   0,  .7, 0,  0;
 0,   .3/4, 0,  .3/4, .3/4, .3/4,0, .7, 0;
 0,   0,    .1, 0,    .1,   .1,  0,  0, .7;
 0,   0,    0,  1/3,   0,   0,   1/3,1/3,0;
 0,   0,    0,  0,     .25, 0,  .25,0.25,0.25;
 0,   0,    0,  0,      0,  1/3,  0, 1/3,1/3 ];

% calculate the distribution after 10 time steps, when starting from 1
P10 = P^10;
mu0 = [1 0 0 0 0 0 0 0 0];
mu10 = mu0*P10;

% gives mu_10 =
% 0.0150    0.0126    0.0100    0.1066    0.1049    0.0765    0.2256    0.2727    0.1762


%% b)
% Is there an invariant distribution, 
% and does it depend on the initial state?

% to calculate the invariant distribution, we solve pi = pi*P
pi = [P'-eye(9); ones(1,9)] \ [zeros(9,1); 1];

% gives
%     0.0124
%     0.0128
%     0.0124
%     0.0923
%     0.1038
%     0.0923
%     0.1998
%     0.2745
%     0.1998

% since the equation has a solution, the solution is an invariant
% distribution.

% we can also try out what happens when increasing the number of time
% steps:
P100 = P^100;
mu100 = mu0*P100;
mu101 = mu0*P^101;
diff100 = mu101 - mu100;
%    1.0e-12 *
%    -0.0120   -0.0000    0.0120   -0.0732         0    0.0732   -0.1200   -0.0001    0.1198
% the difference is so small that we can be fairly sure this is a limiting
% distribution.

% To see if it depends on the initial state, we check if mu_100 differs
% depending on initial state
mub = [0 1 0 0 0 0 0 0 0];
muc = [0 0 1 0 0 0 0 0 0];
mud = [0 0 0 1 0 0 0 0 0];
mue = [0 0 0 0 1 0 0 0 0];
muf = [0 0 0 0 0 1 0 0 0];
mug = [0 0 0 0 0 0 1 0 0];
muh = [0 0 0 0 0 0 0 1 0];
mui = [0 0 0 0 0 0 0 0 1];
mus = [mu0; mub; muc; mud; mue; muf; mug; muh; mui];

for i = 1:9
    s = mus(i,:)*P100-mu100;
    if (abs(s) > 0.00001) 
        fprintf("Not the same\n");
    end
end
% all are within the error margin


%% 3.
% The reward for a transition to 9 is 100, 
% all other transitions have reward 0. 
% Use the equation on slide 17 with discount factor 0.5. 
% What is the value of each state?

% This can be done either directly using matrix notation or recursively.


% Recursive solution
v0 = ones(9,1);
vf = zeros(9,1);
err = ones(9,1)*.000001;
d = 0.5;
R=100;
while any(abs(vf-v0) > err)
    v0 = vf;
    vf(1) = P(1,1)*d*v0(1)+P(1,2)*d*v0(2)+P(1,4)*d*v0(4);
    vf(2) = P(2,1)*d*v0(1)+P(2,2)*d*v0(2)+P(2,3)*d*v0(3)+P(2,5)*d*v0(5);
    vf(3) = P(3,6)*d*v0(6)+P(3,2)*d*v0(2)+P(3,3)*d*v0(3);
    vf(4) = P(4,1)*d*v0(1)+P(4,5)*d*v0(5)+P(4,4)*d*v0(4)+P(4,7)*d*v0(7);
    vf(5) = P(5,4)*d*v0(4)+P(5,2)*d*v0(2)+P(5,6)*d*v0(6)+P(5,5)*d*v0(5)+P(5,8)*d*v0(8);
    vf(6) = P(6,6)*d*v0(6)+P(6,9)*(R+d*v0(9))+P(6,5)*d*v0(5)+P(6,3)*d*v0(3);
    vf(7) = P(7,7)*d*v0(7)+P(7,8)*d*v0(8)+P(7,4)*d*v0(4);
    vf(8) = P(8,8)*d*v0(8)+P(8,7)*d*v0(7)+P(8,5)*d*v0(5)+P(8,9)*(R+d*v0(9));
    vf(9) = P(9,6)*d*v0(6)+P(9,8)*d*v0(8)+P(9,9)*(R+d*v0(9));
end
% vf =
%     2.4825
%     9.4128
%    39.3940
%     4.5439
%    19.5666
%   102.0957
%     9.1836
%    41.3739
%    68.6939


% Matrix method
% to calculate the values of each state, we define a reward matrix
rewards = [0 0 0 0 0 0 0 0 100;
    0 0 0 0 0 0 0 0 100;
    0 0 0 0 0 0 0 0 100;
    0 0 0 0 0 0 0 0 100;
    0 0 0 0 0 0 0 0 100;
    0 0 0 0 0 0 0 0 100;
    0 0 0 0 0 0 0 0 100;
    0 0 0 0 0 0 0 0 100;
    0 0 0 0 0 0 0 0 100];
disc = 0.5;
% and rewrite the given equation to matrix form: 
% I*V = diag(P*rewards')+disc*P*V
% From this we can solve values:
values = (eye(9) - disc * P) \ diag(P*rewards');
% values =
%     2.4825
%     9.4128
%    39.3940
%     4.5439
%    19.5666
%   102.0957
%     9.1836
%    41.3739
%    68.6939


% In case 9 -> 9 would not give any reward:
%     2.0688
%     7.8440
%    32.8283
%     3.7866
%    16.3055
%    85.0797
%     7.6530
%    34.4783
%    23.9116
% Due to unclear instructions, this answer is also accepted

% this can also be done e.g. in Excel by formulating the recursive value 
% equations for each state. In that case, make sure the settings allow 
% iterative calculations and accept circular references.