📄 cmac.cpp
字号:
/* Reinforcement Learning
Implementation of CMAC funcltion approximation
In this implementation, function CMAC::learn(...) implements
learning using eligibility traces. There are functions
implemented to replace, clear and accumulate traces.
There are two ways to use this implementation as in learning without traces.
One: first decay all traces to 0 and then use replace trace for active tiles to 1.
Two: use functions computeGradient and updateParameters.
File: fa_cmac.cpp
Author: Bohdana Ratitch
Version: February 2001
*/
#include <stdlib.h>
#include <string.h>
#include<stdio.h>
#include<math.h>
#include<sys/types.h>
#include<iostream.h>
#include<fstream.h>
#ifndef FA_CMAC
#define FA_CMAC
#include "cmac.h"
#endif
double* CMAC::Tiling::LeftBounds = NULL;
double* CMAC::Tiling::RightBounds = NULL;
double CMAC::Tiling::decay=0;
bool CMAC::Tiling::BoundsSet=false;
CMAC::Tile::Tile(){
//w=(double)rand()/(double)RAND_MAX; //initialize to random values in [0,1]
w=0;
e=1;
alpha=1;
changed=false;
}
void CMAC::Tiling::createTiles()
/* Creates tiling as a collection of tiles.
Offsets this tiling by a random amount
*/
{
int i;
double offset;
N=1;
IndCoef[State::dimensionality-1]=1;
for(i=State::dimensionality-1; i>=0; i--){
/* offset the tiling along each dimension
by a random amount bounded by the width
of tiles along this dimension
*/
offset = (double)rand()/(double)RAND_MAX*h[i];
n[i]=n[i]+1;
N=N*n[i];
origin[i]=LeftBounds[i]-offset;
if (i<(State::dimensionality-1))
IndCoef[i]=IndCoef[i+1]*n[i+1];
}
tiles = new Tile[N];
#ifdef DEBUG
cout << "Total number of tiles: " << N << endl;
cout << "Left bounds \t Origin \t IndCoef " << endl;
for (i=0; i<State::dimensionality; i++)
cout << LeftBounds[i] << "\t" << origin[i] << "\t" << IndCoef[i] << endl;
#endif
}
CMAC::Tiling::Tiling(){
n=new int[State::dimensionality];
h=new double[State::dimensionality];
origin = new double[State::dimensionality];
IndCoef = new int[State::dimensionality];
N=0;
ALPHA_VISITATION_FACTOR=1;
tiles=NULL;
}
CMAC::Tiling::Tiling(const int* nn){
/* General constructor.
Parameters:
nn : pointer to an array contatining number of tiles
along each input dimension
*/
if (BoundsSet==false)
{ cout << "Error (tiling): variables' bounds are not set" << endl;
exit(EXIT_FAILURE);
}
int i;
n=new int[State::dimensionality];
h=new double[State::dimensionality];
origin = new double[State::dimensionality];
IndCoef = new int[State::dimensionality];
for(i=0; i<State::dimensionality; i++){
n[i]=nn[i];
h[i]=(RightBounds[i]-LeftBounds[i])/n[i];
}
createTiles();
ALPHA_VISITATION_FACTOR=1;
#ifdef DEBUG
cout << "Number of tiles \t Tile width" << endl;
for(i=0; i<State::dimensionality; i++)
cout << n[i] << " " << h[i] << endl;
#endif
}
void CMAC::Tiling::operator = (Tiling& t){
int i;
N=t.N;
for(i=0; i<State::dimensionality; i++){
n[i]=t.n[i];
h[i]=t.h[i];
origin[i]=t.origin[i];
IndCoef[i]=t.IndCoef[i];
}
tiles=new Tile[N];
for(i=0; i<N; i++)
tiles[i]=t.tiles[i];
}
int CMAC::Tiling::getSize(){
/* Returns number of tiles (parameters) in this tiling
*/
return N;
}
void CMAC::Tiling::setBounds(const double* left, const double* right){
/* Sets bounds for input variables
*/
int i;
LeftBounds = new double[State::dimensionality];
RightBounds = new double[State::dimensionality];
for(i=0; i<State::dimensionality; i++){
LeftBounds[i]=left[i];
RightBounds[i]=right[i];
}
BoundsSet=true;
#ifdef DEBUG
cout << "Bounds are set to: " << endl;
cout << "Left bounds \t Right bounds" << endl;
for(i=0; i<State::dimensionality; i++)
cout << LeftBounds[i] << "\t" << RightBounds[i] << endl;
#endif
}
void CMAC::Tiling::deleteBounds(){
/* Deletes bounds for input variables
*/
delete [] LeftBounds;
delete [] RightBounds;
BoundsSet=false;
}
void CMAC::Tiling::setTraceDecay(double d){
/* Sets trace decay factor to a particular value
*/
if ((d<0) || (d>1)){
cout << "Error (tiling): trace decay must be in [0,1]" << endl;
exit(EXIT_FAILURE);
}
decay = d;
}
void CMAC::Tiling::setAlpha(double alpha){
/* Sets learning step to the same value for all tiles
*/
int i;
for(i=0; i<N; i++)
tiles[i].alpha=alpha;
}
void CMAC::Tiling::getActiveParameter(const State& s, double& weight, int& index){
/* Calcuates the weight cooresponding to the active tile on
this tiling.
Parameters:
s : input state
weight : parameter to return
index : index of the active tile in the 1-dimensional array of tiles
*/
int i, ind;
index=0;
for(i=0; i<State::dimensionality; i++){
ind=(int)(float)((s.x[i]-origin[i])/h[i]);
index=index+IndCoef[i]*ind;
}
if ((index<0) || (index>=N)) {
cout << "Error (tiling): tile index out of limits" << endl;
cout << "state: " << s << " index=" << index << " N=" << N << endl;
index=0;
for(i=0; i<State::dimensionality; i++){
ind=(int)(float)((s.x[i]-origin[i])/h[i]);
cout << "ind[" << i << "]=" << ind << " IndCoef[" << i << "]=" << IndCoef[i];
index=index+IndCoef[i]*ind;
cout << " index=" << index << endl;
}
exit(EXIT_FAILURE);
}
weight = tiles[index].w;
}
void CMAC::Tiling::updateParameters(double* delta, double& MaxParameterChange, int& NumberParametersChanged)
/* Increment parameters by the amounts
in array delta, multiplied by the appropriate
learning step of each tile
*/
{
int i;
double change;
for (i=0; i<N; i++){
change = tiles[i].alpha * delta[i];
tiles[i].w=tiles[i].w + change;
if (fabs(change)>MaxParameterChange) MaxParameterChange=fabs(change);
if (tiles[i].changed==false){
if (change!=0){
NumberParametersChanged++;
tiles[i].changed=true;
}
}
}
}
void CMAC::Tiling::updateParameters(const State& s, double w_update, double& MaxParameterChange, int& NumberParametersChanged){
/* Updates all parameters of the tiling proportionally to
the amount w_update and corresponding eligibility traces of
each tile. Update eligibility traces as well.
Parameters:
s : input which determines the active tile
w_update : the ammount proportionally to which parameters are updated
*/
int i;
double change;
for(i=0; i<N; i++){
change = tiles[i].alpha*w_update * tiles[i].e;
tiles[i].w = tiles[i].w + change;
if (fabs(change)>MaxParameterChange) MaxParameterChange=fabs(change);
if (tiles[i].changed==false){
if (change!=0){
NumberParametersChanged++;
tiles[i].changed=true;
}
}
}
}
void CMAC::Tiling::decreaseAlpha(double factor){
/* Decrease learning rate of all tiles by factor
(Used with a decreasing schedule)
*/
int i;
for(i=0; i<N; i++)
tiles[i].alpha=tiles[i].alpha/factor;
}
void CMAC::Tiling::decreaseAllAlpha(){
/* Update learning rate of active tile
(used with decreasing schedule where learning rate
is eqaul to 1/#of visitations to the tile)
*/
int i;
double newAlpha;
newAlpha=ALPHA_VISITATION_FACTOR/(floor(ALPHA_VISITATION_FACTOR/tiles[0].alpha)+1);
for(i=0; i<N; i++)
tiles[i].alpha=newAlpha;
}
void CMAC::Tiling::decreaseAlpha(const State& s){
/* Update learning rate of active tile
(used with decreasing schedule where learning rate
is eqaul to 1/#of visitations to the tile)
*/
int ActiveIndex;
double temp;
getActiveParameter(s, temp, ActiveIndex);
tiles[ActiveIndex].alpha=ALPHA_VISITATION_FACTOR/(ALPHA_VISITATION_FACTOR/tiles[ActiveIndex].alpha+1);
}
void CMAC::Tiling::setVisitationFactor(double factor){
ALPHA_VISITATION_FACTOR=factor;
}
void CMAC::Tiling::replaceTrace(const State& s, double replace){
/* Replace traces of parameters,
activated by input state s to value replace
*/
int ActiveIndex;
double temp;
getActiveParameter(s, temp, ActiveIndex);
tiles[ActiveIndex].e=replace;
}
void CMAC::Tiling::decayTraces(double factor){
int i;
for(i=0; i<N; i++)
tiles[i].e=tiles[i].e*factor;
}
void CMAC::Tiling::accumulateTraces(const State& s, double amount){
int ActiveIndex;
double temp;
getActiveParameter(s, temp, ActiveIndex);
tiles[ActiveIndex].e+=amount;
}
void CMAC::Tiling::getMinMax(double& min, double& max, int& minInd, int& maxInd){
/* Get values and tile indeces of the minimum and maximum tile weights
*/
int i;
min=fabs(tiles[0].w);
minInd=0;
max=fabs(tiles[0].w);
maxInd=0;
for(i=1; i<N; i++)
if (fabs(tiles[i].w)<min){
min=fabs(tiles[i].w);
minInd=i;
}
else{
if (fabs(tiles[i].w)>max){
max=fabs(tiles[i].w);
maxInd=i;
}
}
}
void CMAC::Tiling::saveParameters(ofstream& file){
/* Save parameters of the tiling to the file.
Parameters:
file : output file obeject
*/
int i;
file << "Number of tiles: " << N << endl;
if (file.fail()){
cout << "Error (tiling): disk is full " << endl;
exit(EXIT_FAILURE);
}
file << "Number of tiles along each dimension: " << endl;
if (file.fail()){
cout << "Error (tiling): disk is full " << endl;
exit(EXIT_FAILURE);
}
for(i=0; i<State::dimensionality; i++){
file << n[i] << " ";
if (file.fail()){
cout << "Error (tiling): disk is full " << endl;
exit(EXIT_FAILURE);
}
}
file << endl;
file << "Tiles' width along each dimension: " << endl;
if (file.fail()){
cout << "Error (tiling): disk is full " << endl;
exit(EXIT_FAILURE);
}
for(i=0; i<State::dimensionality; i++){
file << h[i] << " ";
if (file.fail()){
cout << "Error (tiling): disk is full " << endl;
exit(EXIT_FAILURE);
}
}
file << endl;
file << "Origin of the tiling: " << endl;
if (file.fail()){
cout << "Error (tiling): disk is full " << endl;
exit(EXIT_FAILURE);
}
for(i=0; i<State::dimensionality; i++){
file << origin[i] << " ";
if (file.fail()){
cout << "Error (tiling): disk is full " << endl;
exit(EXIT_FAILURE);
}
}
file << endl;
file << "Tiles' parameters: " << endl;
if (file.fail()){
cout << "Error (tiling): disk is full " << endl;
exit(EXIT_FAILURE);
}
for(i=0; i<N; i++){
file << tiles[i].w << endl;
if (file.fail()){
cout << "Error (tiling): disk is full " << endl;
exit(EXIT_FAILURE);
}
}
}
void CMAC::Tiling::setParameters(ifstream& file){
/* Read parameters of the tiling from a text file
Parameters:
file : input file object
*/
int i;
char buffer[40];
char c;
file.get(c);
file.get(buffer, strlen("Number of tiles: ")+1);
if (file.fail()){
cout << "Error (tiling): input failed (1)" << endl;
exit(EXIT_FAILURE);
}
int t;
file >> t;
if (file.fail()){
cout << "Error (tiling): input failed (2)" << endl;
exit(EXIT_FAILURE);
}
file.get(c);
file.get(buffer, strlen("Number of tiles along each dimension: ")+1);
if (file.fail()){
cout << "Error (tiling): input failed (3) " << endl;
exit(EXIT_FAILURE);
}
for(i=0; i<State::dimensionality; i++){
file >> n[i];
if (file.fail()){
cout << "Error (tiling): input failed (4) " << i << endl;
exit(EXIT_FAILURE);
}
}
file.get(buffer, 5);
file.get(c);
file.get(buffer, strlen("Tiles' width along each dimension: ")+1);
if (file.fail()){
cout << "Error (tiling): input failed (5) " << endl;
exit(EXIT_FAILURE);
}
for(i=0; i<State::dimensionality; i++){
file >> h[i];
if (file.fail()){
cout << "Error (tiling): input failed (6) " << i << endl;
exit(EXIT_FAILURE);
}
}
file.get(buffer, 5);
file.get(c);
file.get(buffer, strlen("Origin of the tiling: ")+1);
if (file.fail()){
cout << "Error (tiling): input failed (7) " << endl;
exit(EXIT_FAILURE);
}
for(i=0; i<State::dimensionality; i++){
file >> origin[i];
if (file.fail()){
cout << "Error (tiling): input failed (8) " << i << endl;
exit(EXIT_FAILURE);
}
}
file.get(buffer, 5);
if (t!=N){
delete [] tiles;
N=t;
tiles = new Tile[N];
IndCoef[State::dimensionality-1]=1;
for(i=State::dimensionality-2; i>=0; i--)
IndCoef[i]=IndCoef[i+1]*n[i];
}
file.get(c);
file.get(buffer, strlen("Tiles' parameters: ")+1);
if (file.fail()){
cout << "Error (tiling): input failed (9) " << endl;
exit(EXIT_FAILURE);
}
for(i=0; i<N; i++){
file >> tiles[i].w;
if (file.fail()){
cout << "Error (tiling): input failed (10) " << i << endl;
exit(EXIT_FAILURE);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -