📄 lrn_directq.c
字号:
#ifdef PGRL_NO_FA
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <float.h>
#include "lrn_DirectQ.h"
#include "gaussian.h"
#include "misc.h"
int Episodes_Per_Parameter_Update;
int Max_Num_Grad_Calc;
int Update_Policy_Parameters = 0;
extern int **modes_visited;
extern int Mode_Execute;
extern int Step_To_Execute_Mode;
double **dpdc_t, **dpdv_t, *wrk;
double **drhodc, **drhodv;
double **Q,****dpdc, ****dpdv;
double V_for_Policy = 0.0;
extern int dim;
extern int num_of_gaussians;
#ifdef GRAPHICS
extern int Update_Boundaries;
#endif
int Num_of_Grad_Calculations = 0;
void Initialize_PGRL_DirectQ(void)
{
char error_text[256];
FILE *fp;
int i_tmp,i,j;
if ((fp = fopen("PGRL_DirectQ.ini", "r")) == NULL)
{
sprintf(error_text, "Couldn't open \"%s\"\n", "PGRL_DirectQ.ini");
My_Error(error_text);
}
if (fscanf(fp, "%d", &(i_tmp) ) != 1)
{
sprintf(error_text,
"Initialize_Learning_parameters: cannot read Episodes_Per_Parameter_Update\n");
My_Error(error_text);
}
skiptoend(fp);
Episodes_Per_Parameter_Update = i_tmp;
if (fscanf(fp, "%d", &(i_tmp) ) != 1)
{
sprintf(error_text,
"Initialize_Learning_parameters: cannot read Max_Num_Grad_Calc\n");
My_Error(error_text);
}
skiptoend(fp);
Max_Num_Grad_Calc = i_tmp;
fclose(fp);
// the reward file
#ifdef BIAS
if ((fp = fopen("rb.txt", "w")) == NULL)
{
sprintf(error_text, "Couldn't open \"%s\"\n", "rb.txt");
My_Error(error_text);
}
fclose(fp);
if ((fp = fopen("gb.txt", "w")) == NULL)
{
sprintf(error_text, "Couldn't open \"%s\"\n", "g.txt");
My_Error(error_text);
}
fclose(fp);
#else
if ((fp = fopen("r.txt", "w")) == NULL)
{
sprintf(error_text, "Couldn't open \"%s\"\n", "r.txt");
My_Error(error_text);
}
fclose(fp);
if ((fp = fopen("g.txt", "w")) == NULL)
{
sprintf(error_text, "Couldn't open \"%s\"\n", "g.txt");
My_Error(error_text);
}
fclose(fp);
#endif
//
dpdc_t = (double **)My_Malloc((long)dim * sizeof(double*));
dpdv_t = (double **)My_Malloc((long)dim * sizeof(double*));
for ( i = 0; i < dim; i++ )
{
dpdc_t[i] = (double *)My_Malloc((long)num_of_gaussians * sizeof(double));
dpdv_t[i] = (double *)My_Malloc((long)num_of_gaussians * sizeof(double));
}
wrk = (double *)My_Malloc((long)dim * sizeof(double));
drhodc = (double **)My_Malloc((long)num_of_gaussians * sizeof(double*));
drhodv = (double **)My_Malloc((long)num_of_gaussians * sizeof(double*));
for ( i = 0; i < num_of_gaussians; i++ )
{
drhodc[i] = (double *)My_Malloc((long)dim * sizeof(double));
drhodv[i] = (double *)My_Malloc((long)dim * sizeof(double));
for ( j = 0; j < dim; j++ )
{
drhodc[i][j] = 0.0;
drhodv[i][j] = 0.0;
}
}
}
void PGRL_DirectQ(int steps,
double **s, double **g,
double **cen, double **var,
int *mode, double alpha, double gam,
double *r)
{
double g_tot;
int i,j,k,q;
if ( Update_Policy_Parameters == 0 )
{
Q = (double **)My_Malloc((long)steps * sizeof(double*));
dpdc = (double ****)My_Malloc((long)steps * sizeof(double***));
dpdv = (double ****)My_Malloc((long)steps * sizeof(double***));
for ( k = 0; k < steps; k++ )
{
Q[k] = (double *)My_Malloc((long)num_of_gaussians * sizeof(double));
dpdc[k] = (double ***)My_Malloc((long)num_of_gaussians * sizeof(double**));
dpdv[k] = (double ***)My_Malloc((long)num_of_gaussians * sizeof(double**));
for ( i = 0; i < num_of_gaussians; i++ )
{
Q[k][i] = 0.0;
dpdc[k][i] = (double **)My_Malloc((long)dim * sizeof(double*));
dpdv[k][i] = (double **)My_Malloc((long)dim * sizeof(double*));
for ( j = 0; j < dim; j++ )
{
dpdc[k][i][j] = (double *)My_Malloc((long)num_of_gaussians * sizeof(double));
dpdv[k][i][j] = (double *)My_Malloc((long)num_of_gaussians * sizeof(double));
for ( q = 0; q < num_of_gaussians; q++ )
{
dpdc[k][i][j][q] = 0.0;
dpdv[k][i][j][q] = 0.0;
}
}
}
}
for ( i = 1; i < steps; i++ )
{
Q[i][mode[i]] = 0.0;
for ( j = i; j < steps; j++ )
{
Q[i][mode[i]] = Q[i][mode[i]] + r[j] * pow(gam,(double)(j-i));
}
}
V_for_Policy = Q[1][mode[1]];
{ //
char error_text[256];
FILE *fp;
#ifdef BIAS
if ((fp = fopen("rb.txt", "a")) == NULL)
{
sprintf(error_text, "Couldn't open \"%s\"\n", "rb.txt");
My_Error(error_text);
}
#else
if ((fp = fopen("r.txt", "a")) == NULL)
{
sprintf(error_text, "Couldn't open \"%s\"\n", "r.txt");
My_Error(error_text);
}
#endif
fprintf(fp,"%g\n",V_for_Policy);
fclose(fp);
printf("%d: %g\n",Num_of_Grad_Calculations,V_for_Policy);
}
for ( i = 1; i < steps; i++ )
{
g_tot = 0.0;
for ( j = 0; j < num_of_gaussians; j++ )
{
g_tot = g_tot + g[i][j];
}
for ( k = 0; k < num_of_gaussians; k++ )
{
avaluate_total_gradient(dim, s[i], cen[k], var[k],
dpdc_t, dpdv_t, wrk, k, g[i], num_of_gaussians, g_tot);
for ( j = 0; j < dim; j++ )
{
for ( q = 0; q < num_of_gaussians; q++ )
{
dpdc[i][k][j][q] = dpdc_t[j][q];
dpdv[i][k][j][q] = dpdv_t[j][q];
}
}
}
}
Episodes_Per_Parameter_Update = steps - 1;
modes_visited = (int **)My_Malloc((long)Episodes_Per_Parameter_Update * sizeof(int*));
for ( i = 1; i < Episodes_Per_Parameter_Update; i++ )
{
modes_visited[i] = (int *)My_Malloc((long)num_of_gaussians * sizeof(int));
for ( j = 0; j < num_of_gaussians; j++ )
{
modes_visited[i][j] = 0;
}
modes_visited[i][mode[i]] = 1;
}
Update_Policy_Parameters++;
Step_To_Execute_Mode = 1;
for ( i = 0; i < num_of_gaussians; i++ )
{
if ( modes_visited[Step_To_Execute_Mode][i] == 0 )
{
Mode_Execute = i;
//modes_visited[Step_To_Execute_Mode][i] = 1;
break;
}
}
}
else if ( steps <= Step_To_Execute_Mode )
{ // update the gradient
Update_Policy_Parameters = Episodes_Per_Parameter_Update;
}
else
{
double Q_tmp;
int inr_step;
Q_tmp = 0.0;
for ( j = Step_To_Execute_Mode; j < steps; j++ )
{
Q_tmp = Q_tmp + r[j] * pow(gam,(double)(j-Step_To_Execute_Mode));
}
i = Step_To_Execute_Mode;
Q[i][mode[i]] = Q_tmp;
if ( Q[i][mode[i]] == 0.0 )
{
printf("Q_tmp[%d][%d] = %g\n",i,mode[i],Q[i][mode[i]]);
}
g_tot = 0.0;
for ( j = 0; j < num_of_gaussians; j++ )
{
g_tot = g_tot + g[i][j];
}
avaluate_total_gradient(dim, s[i], cen[mode[i]], var[mode[i]],
dpdc_t, dpdv_t, wrk, mode[i], g[i], num_of_gaussians, g_tot);
for ( j = 0; j < dim; j++ )
{
for ( q = 0; q < num_of_gaussians; q++ )
{
dpdc[i][mode[i]][j][q] = dpdc_t[j][q];
dpdv[i][mode[i]][j][q] = dpdv_t[j][q];
}
}
modes_visited[Step_To_Execute_Mode][mode[i]] = 1;
inr_step = 1;
for ( i = 0; i < num_of_gaussians; i++ )
{
if ( modes_visited[Step_To_Execute_Mode][i] == 0 )
{
inr_step = 0;
Mode_Execute = i;
//modes_visited[Step_To_Execute_Mode][i] = 1;
break;
}
}
if ( inr_step == 1)
{
Step_To_Execute_Mode++;
Update_Policy_Parameters++;
if ( Update_Policy_Parameters < Episodes_Per_Parameter_Update )
{
for ( i = 0; i < num_of_gaussians; i++ )
{
if ( modes_visited[Step_To_Execute_Mode][i] == 0 )
{
inr_step = 0;
Mode_Execute = i;
//modes_visited[Step_To_Execute_Mode][i] = 1;
break;
}
}
}
}
}
if ( Episodes_Per_Parameter_Update <= Update_Policy_Parameters )
{
double tdc=0.0, tdv=0.0;
int cont_grad,n,cnt;
for ( j = 0; j < num_of_gaussians; j++ )
{
for ( i = 0; i < dim; i++ )
{
drhodc[j][i] = 0.0;
drhodv[j][i] = 0.0;
}
}
#ifdef BIAS
{
double t1;
for ( k = 1; k < Episodes_Per_Parameter_Update; k++ )
{
t1 = 0.0;
for ( n = 0; n < num_of_gaussians; n++ )
{
t1 = t1 + Q[k][n];
}
t1 = t1 / (double)num_of_gaussians;
for ( n = 0; n < num_of_gaussians; n++ )
{
Q[k][n] = Q[k][n] - t1;
}
}
}
#endif
cnt = 0;
cont_grad = 1;
for ( k = 1; k < Episodes_Per_Parameter_Update && (cont_grad == 1); k++ )
{
for ( j = 0; j < num_of_gaussians && (cont_grad == 1); j++ )
{
cont_grad = modes_visited[k][j];
}
cnt++;
for ( i = 0; i < dim && (cont_grad == 1); i++ )
{
for ( j = 0; j < num_of_gaussians && (cont_grad == 1); j++ )
{
for ( n = 0; n < num_of_gaussians; n++ )
{
drhodc[j][i] = drhodc[j][i] + Q[k][n] * dpdc[k][j][i][n];
drhodv[j][i] = drhodv[j][i] + Q[k][n] * dpdv[k][j][i][n];
}
}
}
}
for ( j = 0; j < num_of_gaussians; j++ )
{
for ( i = 0; i < dim; i++ )
{
drhodc[j][i] = drhodc[j][i] / cnt;
drhodv[j][i] = drhodv[j][i] / cnt;
}
}
#ifdef UPDATE_POLICY_PARAMETERS
{
double tmax;
{ //
char error_text[256];
FILE *fp;
#ifdef BIAS
if ((fp = fopen("gb.txt", "a")) == NULL)
{
sprintf(error_text, "Couldn't open \"%s\"\n", "gb.txt");
My_Error(error_text);
}
#else
if ((fp = fopen("g.txt", "a")) == NULL)
{
sprintf(error_text, "Couldn't open \"%s\"\n", "g.txt");
My_Error(error_text);
}
#endif
for ( j = 0; j < num_of_gaussians; j++ )
{
for ( i = 0; i < dim; i++ )
{
fprintf(fp,"%g\n",drhodc[j][i]);
fprintf(fp,"%g\n",drhodv[j][i]);
}
}
fclose(fp);
}
tdc = 0.0;
tdv = 0.0;
for ( j = 0; j < num_of_gaussians; j++ )
{
for ( i = 0; i < dim; i++ )
{
if ( _isnan(drhodc[j][i]))
{
My_Error("Not a Number!");
}
if ( _isnan(drhodv[j][i]))
{
My_Error("Not a Number!");
}
if ( tdc < fabs(drhodc[j][i]) )
{
tdc = fabs(drhodc[j][i]);
}
if ( tdv < fabs(drhodv[j][i]) )
{
tdv = fabs(drhodv[j][i]);
}
}
}
if ( tdc > tdv )
{
tmax = tdc;
}
else
{
tmax = tdv;
}
if ( _isnan(tmax))
{
My_Error("Not a Number!");
}
for ( j = 0; j < num_of_gaussians; j++ )
{
for ( i = 0; i < dim; i++ )
{
if ( _isnan(drhodc[j][i]))
{
My_Error("Not a Number!");
}
if ( _isnan(drhodv[j][i]))
{
My_Error("Not a Number!");
}
drhodc[j][i] = drhodc[j][i] / tmax;
if ( _isnan(drhodc[j][i]))
{
My_Error("Not a Number!");
}
drhodv[j][i] = drhodv[j][i] / tmax;
if ( _isnan(drhodv[j][i]))
{
My_Error("Not a Number!");
}
}
}
}
#ifdef GRAPHICS
Update_Boundaries = 1;
#endif
for ( j = 0; j < num_of_gaussians; j++ )
{
for ( i = 0; i < dim; i++ )
{
cen[j][i] = cen[j][i] + alpha * drhodc[j][i];
var[j][i] = var[j][i] + alpha * drhodv[j][i];
if ( var[j][i] < 0.01)
var[j][i] = 0.01;
}
}
#else
{ //
char error_text[256];
FILE *fp;
#ifdef BIAS
if ((fp = fopen("gb.txt", "a")) == NULL)
{
sprintf(error_text, "Couldn't open \"%s\"\n", "gb.txt");
My_Error(error_text);
}
#else
if ((fp = fopen("g.txt", "a")) == NULL)
{
sprintf(error_text, "Couldn't open \"%s\"\n", "g.txt");
My_Error(error_text);
}
#endif
for ( j = 0; j < num_of_gaussians; j++ )
{
for ( i = 0; i < dim; i++ )
{
fprintf(fp,"%g %g ",drhodc[j][i],drhodv[j][i]);
}
}
fprintf(fp,"\n");
fclose(fp);
}
#endif
for ( k = 0; k < Episodes_Per_Parameter_Update+1; k++ )
{
free(Q[k]);
for ( i = 0; i < num_of_gaussians; i++ )
{
for ( j = 0; j < dim; j++ )
{
free(dpdc[k][i][j]);
free(dpdv[k][i][j]);
}
free(dpdc[k][i]);
free(dpdv[k][i]);
}
free(dpdc[k]);
free(dpdv[k]);
}
free(Q);
free(dpdc);
free(dpdv);
for ( i = 1; i < Episodes_Per_Parameter_Update; i++ )
{
free(modes_visited[i]);
}
free(modes_visited);
Update_Policy_Parameters = 0;
Step_To_Execute_Mode = -1;
Mode_Execute = -1;
#ifdef UPDATE_POLICY_PARAMETERS
Reset_Random_Seed_For_Paths();
#endif
Num_of_Grad_Calculations++;
if ( Num_of_Grad_Calculations > Max_Num_Grad_Calc )
{
exit(1);
}
}
}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -