0001 function [V Value Alpha t]=Perseus(P,B,stopCriteria)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029 nb=size(B,2);
0030
0031
0032 n=1;
0033 V{n}=Policy;
0034 alpha0=Alpha0(P);
0035 V{n}=AddAlpha(V{n},alpha0,RandomAction(P));
0036
0037 [Alpha{n} Value{n}]=cellfun(@(b)(MaxAlpha(V{n},b)),B);
0038
0039
0040 t_init=cputime;
0041 stop=false;
0042
0043 while ~stop
0044
0045 n1=n+1;
0046 fprintf(' Iteration %u: ',n1);
0047
0048
0049
0050
0051 fprintf('[P')
0052 Alphas_j_a_o=ComputeAlphas_j_a_o(P,V{n});
0053 fprintf('] ');
0054
0055
0056 V{n1}=Policy;
0057
0058
0059 pending=true(1,nb);
0060 nPending=nb;
0061 it=1;
0062 while nPending>0
0063 fprintf('%u(%u)',it,nPending);
0064 it=it+1;
0065
0066
0067 ndx=find(pending);
0068 nPending=size(ndx,2);
0069 cb=ndx(ceil(rand*nPending));
0070
0071
0072
0073
0074
0075 [alpha action]=V{n}{Alpha{n}(cb)};
0076 P1=DiscretizeActionModel(P,action,n);
0077
0078
0079
0080 fprintf('b[');
0081 [alpha optimalAction v]=Backup(P1,B{cb},V{n},Alphas_j_a_o);
0082 fprintf(']');
0083
0084
0085
0086 if v<Value{n}(cb)
0087 [alpha optimalAction]=V{n}{Alpha{n}(cb)};
0088 fprintf('p ');
0089 else
0090 fprintf('n ');
0091 end
0092
0093
0094 [V{n1} l]=AddAlpha(V{n1},alpha,optimalAction);
0095
0096 for i=1:nb
0097 if pending(i)
0098 nv=Expectation(B{i},alpha);
0099 if nv>=Value{n}(i)
0100 pending(i)=false;
0101 nPending=nPending-1;
0102 Value{n1}(i)=nv;
0103 Alpha{n1}(i)=l;
0104 end
0105 end
0106 end
0107
0108 end
0109
0110
0111
0112 t(n1)=cputime-t_init;
0113 vd=Value{n1}-Value{n};
0114 mvd=max(vd);
0115 avd=sum(vd)/nb;
0116
0117 fprintf('\n mvd:%g avd: %g t: %f\n',mvd,avd,t(n1));
0118
0119 n=n1;
0120
0121 stop=stopCriteria(n,t(n),mvd);
0122 end
0123
0124