0001 function [V Value Alpha t]=iPerseus(P,B,stopCriteria)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 n=1;
0034 V{n}=Policy;
0035 alpha0=Alpha0(P);
0036 V{n}=AddAlpha(V{n},alpha0,RandomAction(P));
0037
0038 [Alpha{n} Value{n}]=cellfun(@(b)(MaxAlpha(V{n},b)),B);
0039
0040
0041 nb=size(B,2);
0042
0043
0044 t_init=cputime;
0045 stop=false;
0046
0047 while ~stop
0048
0049 n1=n+1;
0050 fprintf(' Iteration %u: ',n1);
0051
0052
0053
0054
0055
0056 fprintf('[P')
0057 Alphas_j_a_o=ComputeAlphas_j_a_o(P,V{n});
0058 fprintf('] ');
0059
0060
0061 V{n1}=Policy;
0062
0063
0064 Alpha{n1}=zeros(1,nb);
0065 Value{n1}=zeros(1,nb);
0066 pending=true(1,nb);
0067 nPending=nb;
0068 it=1;
0069 while nPending>0
0070 fprintf('%u(%u)',it,nPending);
0071 it=it+1;
0072
0073
0074 ndx=find(pending);
0075 cb=ndx(ceil(rand*nPending));
0076
0077
0078
0079
0080
0081 [alpha action]=V{n}{Alpha{n}(cb)};
0082 P1=DiscretizeActionModel(P,action,n);
0083
0084
0085
0086 fprintf('b[');
0087 [alpha optimalAction v]=Backup(P1,B{cb},V{n},Alphas_j_a_o);
0088 fprintf(']');
0089
0090
0091
0092 if v<Value{n}(cb)
0093
0094 fprintf('p ');
0095
0096 [alpha optimalAction]=V{n}{Alpha{n}(cb)};
0097
0098
0099 [V{n1} l]=AddAlpha(V{n1},alpha,optimalAction);
0100
0101 pending(cb)=false;
0102 Alpha{n1}(cb)=l;
0103 Value{n1}(cb)=Value{n}(cb);
0104 nPending=nPending-1;
0105
0106
0107
0108 for i=1:nb
0109 if pending(i) && Alpha{n}(i)==Alpha{n}(cb)
0110 pending(i)=false;
0111 Alpha{n1}(i)=l;
0112 Value{n1}(i)=Value{n}(i);
0113 nPending=nPending-1;
0114 end
0115 end
0116
0117 else
0118
0119 fprintf('n ');
0120
0121
0122 [V{n1} l]=AddAlpha(V{n1},alpha,optimalAction);
0123
0124 pending(cb)=false;
0125 Alpha{n1}(cb)=l;
0126 Value{n1}(cb)=v;
0127 nPending=nPending-1;
0128
0129 for i=1:nb
0130 if pending(i)
0131 nv=Expectation(B{i},alpha);
0132 if nv>=Value{n}(i)
0133 pending(i)=false;
0134 Alpha{n1}(i)=l;
0135 Value{n1}(i)=nv;
0136 nPending=nPending-1;
0137 end
0138 end
0139 end
0140
0141 end
0142
0143 end
0144
0145 t(n1)=cputime-t_init;
0146 vd=Value{n1}-Value{n};
0147 mvd=max(vd);
0148 avd=sum(vd)/nb;
0149
0150 fprintf('\n mvd:%g avd: %g t: %f\n',mvd,avd,t(n1));
0151
0152 n=n1;
0153
0154 stop=stopCriteria(n,t(n),mvd);
0155 end
0156
0157