i_reinforcementlearner_id.java

来自「TeamBots 是一个可移植的多代理机器人仿真器」· Java 代码 · 共 243 行

JAVA

243 行

/** * i_ReinforcementLearner_id.java  */package EDU.gatech.cc.is.learning;import	java.io.*;/** * Reinforcement learning class with discrete inputs and outputs.   * The module will learn to select a discrete output based on  * state and a continuous reinforcement input.  The "i"s in front  * of and behind the name imply that this class takes integers as  * input and output.  The "d" indicates a double for the reinforcement  * input (i.e. a continuous value).  No committment is made to  * model-free or model-based learning, the class may be extended to either.  * <P> * Copyright (c)2000 Tucker Balch * * @author Tucker Balch (tucker@cc.gatech.edu) * @version $Revision: 1.1 $ */public abstract class i_ReinforcementLearner_id	implements Cloneable, Serializable	{	/**	 * Indicates whether logging is turned on or not.	 */	protected boolean	logging = false;	/**	 * The number of situations or states the object may be in.	 */	protected int		numstates;	/**	 * The number of actions to select from.	 */	protected int		numactions;	/**	 * The name of the file to save or read the policy in/from.	 */	protected String	policyfilename="default.policy";	private String	logfilename = "";	private FileOutputStream logfile;	private PrintWriter	logstream;	/**	 * Instantiates a reinforcement learner using default parameters.         * Individual implementations may allow additional	 * parameters to be adjusted using accessor methods.	 *	 * @param numstates  int, the number of states the system could be in.	 * @param numactions int, the number of actions or outputs to select from.	 */	public i_ReinforcementLearner_id(int numstatesin, int numactionsin)		{		numstates = numstatesin;		numactions = numactionsin;		}        /**         * Generate a String that describes the learner.         *         * @return a String describing the learner.         */        public String toString()                {                int i, j;                String retval = "Reinforcement learner with "+numstates+			" states and "+numactions+" actions \n";		return(retval);		}	/**	 * Select an output based on the state and reward.	 *	 * @param statein  int,    the current state.	 * @param rewardin double, reward for the last output, positive	 *                         numbers are "good."	 */	public	abstract int query(int statein, double rewardin);	/**	 * Called when the current trial ends.	 * This is used for tasks that have "absorbing states."	 *	 * @param V      double, the value of the absorbing state. Usually         *                       a large positive number if the final state	 *                       is desirable, or negative otherwise.	 * @param reward double, the single-step reward for the last 	 *		         state/action.	 */	public	abstract void endTrial(double V, double reward);	/**	 * Called to initialize for a new trial and get the	 * first action.	 *	 * @param statein  int,    the current state.	 */	public	abstract int initTrial(int statein);	/**	 * return statistical info about the learner.	 * Logging must be on for this to work.	 *	 * @return the average reward per query for the trial.	 */	public	double getAvgReward()		{		return(0);		}	/**	 * return statistical info about the learner.	 * Logging must be on for this to work.	 *	 * @return the number of policy changes for the trial.	 */	public	int getPolicyChanges()		{		return(0);		}	/**	 * return statistical info about the learner.	 * Logging must be on for this to work.	 *	 * @return the number of queries for the trial.	 */	public	int getQueries()		{		return(0);		}	/**	 * Set the filename for policy reading and writing.	 *	 * @param filename String, the directory and filename to use.	 */	public	void	setPolicyFileName(String filename)		{		policyfilename = filename;		}	/**	 * Turn on logging for data gathering.  It is up to	 * the extended classes to actually do the writing to the file.	 *	 * @param filename String, the directory and filename to use.	 */	public	void	loggingOn(String filename)		{		logfilename = filename;		logging = true;		try			{                	logfile = 				new FileOutputStream(logfilename);                	logstream = new PrintWriter(				logfile);			}		catch (IOException e)			{			logging = false;			System.out.println("id_ReinforcementLearner_i: can't open logfile "				+logfile);			}		}	/**	 * Print something to the logfile. Called by the classes that 	 * extend this one.	 *	 * @param out String, the string to print.	 */	public	void	log(String out)		{		if ((logging)&&(logstream!=null))			{			logstream.println(out);			}		}				/**	 * Turn logging off.	 */	public	void	loggingOff()		{		logging = false;		try			{			logfile.close();			}		catch (IOException e)			{			}		}		/**	 * Turn logging on, but without opening a file.	 * This indicates we should keep track of some items, but not	 * save them to a file.	 */	public	void	loggingOn()		{		logging = true;		}		/**	 * Read the policy from a file.	 * Use setPolicyFileName() to determine where the policy is saved.	 *	 * @param filename String, the name of the file to read from.	 */	public abstract void readPolicy() throws IOException;	/**	 * Write the policy to a file.	 * Use setPolicyFileName() to determine where the policy is read from.	 *	 * @param filename String, the name of the file to write to.	 */	public abstract void savePolicy() throws IOException;	}

i_reinforcementlearner_id.java - 源码说明

本页面展示了「TeamBots 是一个可移植的多代理机器人仿真器」中的 i_reinforcementlearner_id.java 源码文件，采用 Java 编程语言编写，共 243 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫下载站收录了大量与TeamBots相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?