📄 mlp.java
字号:
swap = out2; out2 = new_out2; new_out2 = swap; swap = out3; out3 = new_out3; new_out3 = swap; swap = errors; errors = new_errors; new_errors = swap; SSE = new_SSE; } //BACKPROPAGATION PHASE d3 = deltalog(out3,errors); d2 = deltalog(out2,d3,wmx3); d1 = deltalog(out1,d2,wmx2); return SSE; } /**@return Number of training epochs performed so far on the network */ public int getepoch() { return epoch; } /**@param n Integer specifying which layer's weight matrix you want. (1 forinput layer, 2 for hidden layer, 3 for output layer)@return A double[][] matrix containing the current weight matrix of layer n(has Sn rows, and S(n-1) columns- or rawDim columns if n=1) */ public double[][] getWeightMatrix(int n) { double[][] W; switch (n) { case 1: W = wmx1; break; case 2: W = wmx2; break; case 3: W = wmx3; break; default: System.out.println("getWeightMatrix: Invalid parameter"); return null; } return W; } /**@param n Integer specifying which layer's bias vector you want. (1 forinput layer, 2 for hidden layer, 3 for output layer)@return double[][] containing the current bias vector of layer n(has Sn rows, one column) */ public double[][] getBiasVector(int n) { double[][] b; switch (n) { case 1: b = b1; break; case 2: b = b2; break; case 3: b = b3; break; default: System.out.println("getBiasVector: Invalid parameter"); return null; } return b; } //--------------------------------------------------------------------- //-------- END OF PUBLIC API -------- PRIVATE METHODS FOLLOW ---------- //---------------------------------------------------------------------/*---------------------------------------------------------------------PRIVATE METHOD: InitWBPARAMETERS: int S, int rawDim, int nRETURNS: voidALTERS CLASS VARIABLES: wmx1,b1,wmx2,b2,wmx3,b3, depending on nREADS CLASS VARIABLES: noneThis method is the Nguyen-Widrow random initializer for log-sigmoid neurons.It establishes a properly sized and randomly initialized weight matrix andbias vector for neuron layer n (comprised of S neurons) to use inprocessing its rawDim input values. This method is called by the constructor. */ private void InitWB(int nS,int rawDim,int n) { //Method to establish a properly sized and randomly initialized //weight matrix and bias vector for the neuron layer n (comprised //of nS neurons) to use in processing its rawDim input values. double[][] w = new double[nS][rawDim]; double[][] b = new double[nS][1]; double acc,acc2; double magw; magw = 2.8 * (nS^(1/rawDim)); for (int i=0; i<nS; i++) { b[i][0] = (2*Math.random())-1; acc = 0; for (int j=0; j<rawDim; j++) { w[i][j] = (2*Math.random())-1; acc += (w[i][j]*w[i][j]); } // Normalize the "neuron i" row vector (in w) created in previous loop acc = Math.sqrt(1/acc); acc2 = 0; for (int j=0; j<rawDim; j++) { w[i][j] *= (2 * acc); acc2 += w[i][j]; } //Normalize "neuron i" bias vector b with respect to its weights in w b[i][0] -= (acc2/2); } //Now w and b are initialized and "presentable". switch (n) { case 1: wmx1 = w; b1 = b; break; case 2: wmx2 = w; b2 = b; break; case 3: wmx3 = w; b3 = b; break; default: System.out.println("InitWB: Invalid network layer argument."); } } /*---------------------------------------------------------------------PRIVATE METHOD: logsigPARAMETERS: double[][] ip, double[][] bRETURNS: double [][] output from a log-sigmoid transfer functionALTERS CLASS VARIABLES: noneREADS CLASS VARIABLES: nonelogsig adds the bias vector element from b to each element in thecorresponding row of ip (which is a matrix product of weights x inputs).It then subjects each element of the result to a log-sigmoid transferfunction 1/(1+e^-x) and returns the resulting array. */ private double[][] logsig( double[][] ip, double[][] b ) { int[] ip_dims = getSize(ip); double[][] out = new double[ip_dims[0]][ip_dims[1]]; for (int i=0;i<ip_dims[0];i++) { for (int j=0;j<ip_dims[1];j++) { ip[i][j] += b[i][0]; out[i][j] = 1/(1+Math.exp(-ip[i][j])); } } return out; } /*---------------------------------------------------------------------PRIVATE METHOD: deltalogPARAMETERS: double[][] out, double[][] errRETURNS: double [][] matrix of derivatives of error for an output layer.ALTERS CLASS VARIABLES: noneREADS CLASS VARIABLES: noneThis version of deltalog takes two arguments: out[][], an SnxQ matrixof outputs from layer n, and err[][], an SnxQ matrix of associated errorsfrom layer n. The value returned is an SnxQ matrix of derivatives oferror for the output layer. This version is used for the output layer only.The input and hidden layers do not have their associated errors readilyavailable and they must be calculated indirectly using the chain rule ofderivatives. */ private double[][] deltalog( double[][] out, double[][] err) { int[] dims=getSize(out); //should be same as getSize(err) double[][] delta= new double[dims[0]][dims[1]]; for (int i=0;i<dims[0];i++){ for (int j=0;j<dims[1];j++) { delta[i][j] = out[i][j] * (1-out[i][j]) * err[i][j]; } } return delta; } /*---------------------------------------------------------------------PRIVATE METHOD: deltalogPARAMETERS: double[][] out, double[][] d, double[][] wRETURNS: double [][] matrix of derivatives of error for a layer.ALTERS CLASS VARIABLES: noneREADS CLASS VARIABLES: noneThis version of deltalog takes 3 arguments: out[][], an SnxQ matrixof outputs from layer n, d[][], the S(n+1)xQ matrix of derivatives oferror for the succeeding layer, and w, the weight matrix for that layer.It calculates the SnxQ matrix of associated errors for layer n by applyingthe chain rule of derivatives, using the weight matrix and error derivativeof the previous layer. This version is used for the input and hiddenlayers only. The output layer has its associated error available already,and does not need to have it computed. */ private double[][] deltalog( double[][] out, double[][] d, double[][] w) { // out is an SnxQ matrix of outputs from layer n. // d is an SnxQ matrix of derivatives of error for layer n+1. // w is the associated weight matrix of layer n+1. // This method computes err- the SnxQ matrix of associated errors // for layer n- using d and w. // This method is typically used for the input and hidden layers only. int[] dims = getSize(out); double[][] delta = new double[dims[0]][dims[1]]; double[][] wt = transpose(w); double[][] err = multiply(wt,d); for (int i=0; i<dims[0]; i++){ for (int j=0; j<dims[1]; j++) { delta[i][j] = out[i][j] * (1-out[i][j]) * err[i][j]; } } return delta; } /*---------------------------------------------------------------------PRIVATE METHOD: multiplyPARAMETERS: double[][] A, double[][] BRETURNS: double [][] AxB, the inner product of the matrix arguments.ALTERS CLASS VARIABLES: noneREADS CLASS VARIABLES: noneThis is simply a method to multiply two matrices. It is used frequently.If A and B are not compatible for multiplication, this method returns anull value. */ private double[][] multiply(double[][] A, double[][] B) { // Method to multiply two matrices: AxB=C int[] dimA = getSize(A); int[] dimB = getSize(B); int Am = dimA[0]; int An = dimA[1]; int Bm = dimB[0]; int Bn = dimB[1]; int[] tmp = getSize(B); if (An != Bm) { // # columns in A must equal # rows in B for AxB to be defined System.out.println("multiply error"); System.out.println("rows in A "+Am); System.out.println("cols in A "+An); System.out.println("rows in B "+Bm); System.out.println("cols in B "+Bn); return null; } double[][] C = new double[Am][Bn]; for (int i=0;i<Am;i++) { for (int j=0; j<Bn; j++) { C[i][j]=0; for (int k=0; k<An; k++) { C[i][j] += A[i][k]*B[k][j]; } } } return C; } /*---------------------------------------------------------------------PRIVATE METHOD: transposePARAMETERS: double[][] ARETURNS: double [][] transpose of matrix argumentALTERS CLASS VARIABLES: noneREADS CLASS VARIABLES: noneThis method simply returns the transpose of a matrix. It is calledby the three-argument version of deltalog. */ private double[][] transpose(double[][] A) { // returns transpose of a matrix A int[] dim_A = getSize(A); int m=dim_A[0]; int n=dim_A[1]; double[][] At = new double[n][m]; for (int i=0; i<m; i++) { for (int j=0; j<n; j++) { At[j][i] = A[i][j]; } } return At; }/*---------------------------------------------------------------------PRIVATE METHOD: getSizePARAMETERS: double[][] ARETURNS: int[] containing dimensions of AALTERS CLASS VARIABLES: noneREADS CLASS VARIABLES: noneJava's array "length" field only returns the number of rows in a2-D array- there is no easy way to get the number of columns.This method will return a two-element int array containing thedimension of its 2-D double[][] argument, i.e. number of rows andcolumns contained in a RECTANGULAR double[][] array A. It searchesfor the lowest array column index that will generate anArrayIndexOutOfBoundsException. This method only scans the lengthof the first row. It is not expecting funny arrays that are triangularor otherwise irregularly shaped but it will still work on them. */ private int[] getSize(double[][] A) { double temp; boolean gotCols; int[] dims=new int[2]; int stepsize,Cols; dims[0] = A.length; //The # of rows is the easy part! stepsize = 1024; Cols = stepsize; gotCols = false; while ( gotCols==false ) { try { temp = A[0][Cols]; //Executed normally... Cols is within bounds of array Cols += stepsize; } catch (java.lang.ArrayIndexOutOfBoundsException e) { // Uh oh, index "Cols" is out of bounds! if (stepsize==1) { gotCols=true; } else { Cols -= stepsize; stepsize /= 2; } } } dims[1] = Cols; return dims; } /*---------------------------------------------------------------------PRIVATE METHOD: diagnosticPARAMETERS: double[][] testarray, String messageRETURNS: voidALTERS CLASS VARIABLES: noneREADS CLASS VARIABLES: noneThis is a debugging method that I decided to leave in here because itmay prove handy to anyone modifying the code. All it does is spit outthe sum and sum of squares of the first column of an input matrix tothe console output, preceded by the String argument. It is useful ifyou want to determine how array contents are changing betweensubsequent calls. */ private void diagnostic(double[][] testarray, String message) { int M = testarray.length; double acc = 0; double accsq = 0; for (int i=0; i<M; i++) { acc += testarray[i][0]; accsq += (testarray[i][0]*testarray[i][0]); } System.out.println(message); System.out.println("acc "+acc+" accsq "+accsq); System.out.println("-------------------------------"); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -