rlpolicy.java

来自「用java写的一个强化学习程序」· Java 代码 · 共 192 行

JAVA

192 行

import java.lang.reflect.*;
import java.lang.*;

public class RLPolicy {
    
    // Array qValuesTable;
    int[] dimSize;
    double[] qValues;
    private Object qValuesTable;
    int states, actions;
    
    RLPolicy( int[] dimSize ) {
	
	this.dimSize = dimSize;
	
	// Create n-dimensional array with size given in dimSize array.
	qValuesTable = Array.newInstance( double.class, dimSize );
	
	// Get number of states.
	states = dimSize[0];
	for( int j = 1 ; j < dimSize.length - 1 ; j++)
	    states *= dimSize[j];
	
	// Get number of actions.
	actions = dimSize[dimSize.length - 1]; 
    }


    public void initValues( double initValue ) {
	
	int i;
	int actualdim = 0;
	int state[] = new int[dimSize.length - 1];

	System.out.println( "States: " + states ); 
	for( int j = 0 ; j < states ; j++ ) {
	    
	    qValues = (double[]) myQValues( state );
	    
	    for( i = 0 ; i < actions ; i++ ) {
		// System.out.print( i );
		Array.setDouble( qValues, i, ( initValue )); //+ 0.0000000000000000001 * Math.random() ) );
	    }

	    state = getNextState( state );
	}
	
    }
    private int[] getNextState( int[] state ) {

	int i;
	int actualdim = 0;
	
	state[actualdim]++;
	if( state[actualdim] >= dimSize[actualdim] ) {
	    while( ( actualdim < dimSize.length - 1 ) && ( state[actualdim] >= dimSize[actualdim] ) ) {
		actualdim++;
		
		if( actualdim == dimSize.length - 1 )
		    return state;
		
		state[actualdim]++;
	    }
	    for( i = 0 ; i < actualdim ; i++ ) 
		state[i] = 0;
	    actualdim = 0;
	}
	return state;
    }
    
    private double[] myQValues( int[] state ) {
	
	int i;
	Object curTable = qValuesTable;

	for( i = 0 ; i < dimSize.length - 2 ; i++ ) {
	    //descend in each dimension
	    curTable = Array.get( curTable, state[i] );
	}

	//at last dimension of Array get QValues.
	return (double[]) Array.get( curTable, state[i] );
    }
    
    public double[] getQValuesAt( int[] state ) {
	
	int i;
	Object curTable = qValuesTable;
	double[] returnValues;

	for( i = 0 ; i < dimSize.length - 2 ; i++ ) {
	    //descend in each dimension
	    curTable = Array.get( curTable, state[i] );
	}

	//at last dimension of Array get QValues.
	qValues = (double[]) Array.get( curTable, state[i] );
	returnValues = new double[ qValues.length ];
	System.arraycopy( qValues, 0, returnValues, 0, qValues.length );
	return returnValues;
    }
    
    
    public void setQValue( int[] state, int action, double newQValue ) {
	
	qValues = myQValues( state );
	Array.setDouble( qValues, action, newQValue );
    }

    public double getMaxQValue( int[] state ) {
	
	double maxQ = -Double.MAX_VALUE;
	
	qValues = myQValues( state );
	
	for( int action = 0 ; action < qValues.length ; action++ ) {
	    if( qValues[action] > maxQ ) {
		maxQ = qValues[action];
	    }
	}
	return maxQ;
    }
    
    public double getQValue( int[] state, int action ) {
	
	double qValue = 0;
	
	qValues = myQValues( state );
	qValue = qValues[action];

	return qValue;
    }

    public int getBestAction( int[] state ) {
    
	double maxQ = -Double.MAX_VALUE;
	int selectedAction = -1;
	int[] doubleValues = new int[qValues.length];
	int maxDV = 0;

	qValues = myQValues( state );
	
	for( int action = 0 ; action < qValues.length ; action++ ) {
	    //System.out.println( "STATE: [" + state[0] + "," + state[1] + "]" ); 
	    //System.out.println( "action:qValue, maxQ " + action + ":" + qValues[action] + "," + maxQ );
	    
	    if( qValues[action] > maxQ ) {
		selectedAction = action;
		maxQ = qValues[action];
		maxDV = 0;
		doubleValues[maxDV] = selectedAction;
	    }
	    else if( qValues[action] == maxQ ) {
		maxDV++;
		doubleValues[maxDV] = action; 
	    }
	}
	
	if( maxDV > 0 ) {
	    //System.out.println( "DOUBLE values, random selection, maxdv =" + maxDV );
	    int randomIndex = (int) ( Math.random() * ( maxDV + 1 ) );
	    selectedAction = doubleValues[ randomIndex ];
	}
	
	
	if( selectedAction == -1 ) {
	    //System.out.println("RANDOM Choice !" );
	    selectedAction = (int) ( Math.random() * qValues.length );
	}
	
	return selectedAction;
    
    }
    
    /*public double getBestAction( int[]state, int bestAction ) {
    private double bestAction( int[] state, int bestAction ) {

	double maxQ = 0;
	bestAction = -1;
	
	qValues = getQValuesAt( state );
	
	for( int action = 0 ; action < qValues.length ; action++ ) {
	    if( qValues[action] > maxQ ) {
		bestAction = action;
		maxQ = qValues[action];
	    }
	}
	return maxQ;
    */
}

rlpolicy.java - 源码说明

本页面展示了「用java写的一个强化学习程序」中的 rlpolicy.java 源码文件，采用 Java 编程语言编写，共 192 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫下载站收录了大量与java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?