path.c

来自「这个代码是policy iteration算法关于强化学习的. 请您用winzi」· C语言代码 · 共 441 行
441 行


#include <stdlib.h>
#include <stdio.h>
#include <math.h>


#include "path.h"
#include "misc.h"
#include "gaussian.h"


/* path global variables (for speed) */

int deterministic;

int dim;
int straight_line_steps;
int max_steps_to_goal;
double *r; // the reward at each step

int step = 0;
int episode = 0;

int max_num_episodes;

double step_size,step_size_sq;

double **s_hist;
double *start_s;
double *goal_s;

double *next_des_state;

int num_of_gaussians;
double **gauss_center, **gauss_var, *g, **g_hist;
int *mode_hist;

double noise_mag;

int **modes_visited;
int Mode_Execute = -1;
int Step_To_Execute_Mode = -1;

/******************************/


void path(void)
{
	int cont = 1;
	int i;

	My_Seed_Rand_Action_Selection();
	
	for (i =0; i < dim; i++)
	{
		s_hist[0][i] = start_s[i];
	}

	step = 0;

	r[0] = 0.0;

	for ( step = 1; (step < max_steps_to_goal) && (cont == 1); step++)
	{
		cont = update_state();
	}
	if ( step == max_steps_to_goal )
	{
		step = max_steps_to_goal -1;
		r[step] = -1;
	}

}


int update_state(void)
{
	int pstep;
	int i,mode;

	double g_tot,g_rand,t1,dist_to_center;

	pstep = step - 1;

	g_tot = 0.0;
	for ( i = 0; i < num_of_gaussians; i++ )
	{
		g[i] = evaluate_gauss(dim, s_hist[pstep], gauss_center[i], gauss_var[i]);
		g_hist[step][i] = g[i];
		g_tot = g_tot + g[i];
	}

	if ( step == Step_To_Execute_Mode && (Mode_Execute >= 0) &&
		(Mode_Execute < num_of_gaussians))
	{
		mode = Mode_Execute;
	}
	else if ( deterministic == 1 )
	{
		double g_max;

		mode = 0;
		g_max = g[0];
		for ( i = 1; i < num_of_gaussians; i++ )
		{
			if ( g[i] > g_max )
			{
				mode = i;
				g_max = g[i];
			}
		}
	}
	else
	{
		g_rand = g_tot * My_Rand_Action_Selection();
		g[0] = g[0];
		if ( g_rand < g[0])
		{
			mode = 0;
		}
		else
		{
			int cont_find_mode = 1;
			for ( i = 1; (i < num_of_gaussians) && (cont_find_mode == 1); i++ )
			{
				g[i] = g[i-1] + g[i];
				
				if ( g_rand < g[i] )
				{
					cont_find_mode = 0;
					mode = i;
				}
			}
			if ( cont_find_mode == 1 )
			{
				mode = num_of_gaussians - 1;
			}
		}
	}

	mode_hist[step] = mode;

	if ( mode == 0 )
	{
		Desired_Move_Position(goal_s,1);
	}
	else
	{
		Desired_Move_Position(gauss_center[mode],-1);
	}

	for ( i = 0; i < dim; i++ )
	{
		t1 = fabs(s_hist[pstep][i]-next_des_state[i]);
		g_rand = (My_Rand() - 0.5) * t1;
		s_hist[step][i]  = (g_rand * noise_mag) + next_des_state[i];
	}

	if ( Goal_Reached() )
	{
		r[step] = 1;
		return(0);
	}
	else
	{
		dist_to_center = 0.0;
		for ( i = 0; i < dim; i++ )
		{
			t1 = (s_hist[step][i] + 0.3);
			dist_to_center = dist_to_center +  (t1 * t1);
		}
		r[step] = -0.01 * exp(-dist_to_center/4.0);
		return(1);
	}
}

void Desired_Move_Position(double *ns, int dir)
{
	int i,pstep=step-1;
	double tot=0.0,t1,t2;

	for ( i = 0; i < dim; i++ )
	{
		next_des_state[i] = ns[i] - s_hist[pstep][i];
		tot = tot + next_des_state[i] * next_des_state[i];
	}
	tot = sqrt(tot);

	if ( tot <= 0.0 )
	{
		My_Error("Desired_Move_Position Error: tot dist <= 0.0");
	}

	t2 = 0.0;
	for ( i = 0; i < dim; i++ )
	{
		t1 = s_hist[pstep][i] - goal_s[i];
		t2 = t2 + (t1 * t1);
	}
	t2 = sqrt(t2);
	if ( t2 < step_size )
	{
		step_size = t2;
	}
	for ( i = 0; i < dim; i++ )
	{
		next_des_state[i] = s_hist[pstep][i] + 
			(double)dir * (next_des_state[i] * step_size / tot);
	}
}

int Goal_Reached(void)
{
	double t1,t2 = 0;
	int i;
	

	for ( i = 0; i < dim; i++ )
	{
		t1 = s_hist[step][i] - goal_s[i];
		t2 = t2 + (t1 * t1);
	}
	
	if ( t2 <= step_size_sq)
	{
		return(1);
	}
	else
	{
		return(0);
	}
}


void init_path(void)
{
	char error_text[256];
	FILE *fp;
	float f_tmp1, f_tmp2;
	int i_tmp,i,j;

	double t1, t2;
	


	/** read the lrn.ini file ***/
	if ((fp = fopen("sim.ini", "r")) == NULL)
    {
		sprintf(error_text, "Couldn't open \"%s\"\n", "sim.ini");
		My_Error(error_text);
    }
	
	if (fscanf(fp, "%d", &(i_tmp) ) != 1)
	{
		sprintf(error_text,
			"Initialize_Learning_parameters: cannot read dim\n");
		My_Error(error_text);
	}
	skiptoend(fp);
	dim = i_tmp;

	if (fscanf(fp, "%d", &(i_tmp) ) != 1)
	{
		sprintf(error_text,
			"Initialize_Learning_parameters: cannot read deterministic\n");
		My_Error(error_text);
	}
	skiptoend(fp);
	deterministic = i_tmp;

	if (fscanf(fp, "%d", &(i_tmp) ) != 1)
	{
		sprintf(error_text,
			"Initialize_Learning_parameters: cannot read straight_line_steps\n");
		My_Error(error_text);
	}
	skiptoend(fp);
	straight_line_steps = i_tmp;

	if (fscanf(fp, "%d", &(i_tmp) ) != 1)
	{
		sprintf(error_text,
			"Initialize_Learning_parameters: cannot read max_steps_to_goal\n");
		My_Error(error_text);
	}
	skiptoend(fp);
	max_steps_to_goal = i_tmp;

	r = (double *)My_Malloc((long)max_steps_to_goal  * sizeof(double));

	if (fscanf(fp, "%d", &(i_tmp) ) != 1)
	{
		sprintf(error_text,
			"Initialize_Learning_parameters: cannot read max_num_episodes\n");
		My_Error(error_text);
	}
	skiptoend(fp);
	max_num_episodes = i_tmp;
	
	if (fscanf(fp, "%f", &(f_tmp1) ) != 1)
	{
		sprintf(error_text,
			"Initialize_Learning_parameters: cannot read noise_mag\n");
		My_Error(error_text);
	}
	skiptoend(fp);
	noise_mag = f_tmp1;
	
		
	start_s = (double *)My_Malloc((long)dim  * sizeof(double));
	goal_s = (double *)My_Malloc((long)dim  * sizeof(double));

	next_des_state = (double *)My_Malloc((long)dim  * sizeof(double));
	
	// read the start position
	for ( i = 0; i < dim; i++ )
	{
		if (fscanf(fp, "%f", &(f_tmp1) ) != 1)
		{
			sprintf(error_text,
				"Initialize_Learning_parameters: cannot read start_s\n");
			My_Error(error_text);
		}
		start_s[i] = f_tmp1;
	}
	skiptoend(fp);

	// read the goal position
	for ( i = 0; i < dim; i++ )
	{
		if (fscanf(fp, "%f", &(f_tmp1) ) != 1)
		{
			sprintf(error_text,
				"Initialize_Learning_parameters: cannot read start_s\n");
			My_Error(error_text);
		}
		goal_s[i] = f_tmp1;
	}
	skiptoend(fp);

	
	// calculate the step size
	t1 = 0.0;
	for ( i = 0; i < dim; i++ )
	{
		t2 = start_s[0] - goal_s[i];
		t1 = t1 + t2 * t2;
	}
	step_size = sqrt(t1) / straight_line_steps;
	step_size_sq = step_size * step_size;

	// read the guassian positions
	if (fscanf(fp, "%d", &(i_tmp) ) != 1)
	{
		sprintf(error_text,
			"Initialize_Learning_parameters: cannot read num_of_gaussians\n");
		My_Error(error_text);
	}
	skiptoend(fp);
	num_of_gaussians = i_tmp;
	
	g = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));
	gauss_center = (double **)My_Malloc((long)num_of_gaussians  * sizeof(double*));
	gauss_var = (double **)My_Malloc((long)num_of_gaussians  * sizeof(double*));
	for ( i = 0; i < num_of_gaussians; i++ )
	{
		gauss_center[i] = (double *)My_Malloc((long)dim  * sizeof(double));
		gauss_var[i] = (double *)My_Malloc((long)dim  * sizeof(double));
		
		for ( j = 0; j < dim; j++ )
		{
			if (fscanf(fp, "%f %f", &(f_tmp1), &(f_tmp2) ) != 2)
			{
				sprintf(error_text,
					"Initialize_Learning_parameters: cannot read gauss_var\n");
				My_Error(error_text);
			}
			gauss_center[i][j] = f_tmp1; // position
			t1 = f_tmp2 * f_tmp2;
			gauss_var[i][j] = t1; // variance
		}
		skiptoend(fp);
	}
		
	
	s_hist = (double **)My_Malloc((long)max_steps_to_goal  * sizeof(double*));
	for ( i = 0; i < max_steps_to_goal; i++ )
	{
		s_hist[i] = (double *)My_Malloc((long)dim  * sizeof(double));
	}

	g_hist = (double **)My_Malloc((long)max_steps_to_goal  * sizeof(double*));
	for ( i = 0; i < max_steps_to_goal; i++ )
	{
		g_hist[i] = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));
	}

	mode_hist = (int *)My_Malloc((long)max_steps_to_goal  * sizeof(int));

	fclose(fp);

#ifdef UPDATE_POLICY_PARAMETERS
	My_Seed_Rand(); // initialize the random number generator
#else
	My_Seed_Rand_Fixed(); // initialize the random number generator
#endif

}

void clean_up_path_variables(void)
{
	int i;

	free(start_s);
	free(goal_s);

	for ( i = 0; i < max_steps_to_goal; i++ )
	{
		free(s_hist[i]);
		free(g_hist[i]);
	}
	free(s_hist);
	free(g_hist);

	for ( i = 0; i < num_of_gaussians; i++ )
	{
		free(gauss_center[i]);
		free(gauss_var[i]);
	}
	free(gauss_center);
	free(gauss_var);
	free(g);

	free(r);

	free(next_des_state);

	free(mode_hist);

}
path.c - 源码说明

本页面展示了「这个代码是policy iteration算法关于强化学习的. 请您用winzip 解压缩」中的 path.c 源码文件，采用 C语言编程语言编写，共 441 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与iteration相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?