📄 noise_filter.java

📁 java的小波分析程序
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页


package wavelet_util;

import java.util.Vector;
import sort.*;
import wavelets.*;
import java.io.*;

/**
  <p>
  The objective in filtering is to remove noise while keeping the
  features that are interesting.
  </p>

  <p> 
  Wavelets allow a time series to be examined at various
  resolutions.  This can be a powerful tool in filtering out noise.
  This class supports the subtraction of gaussian noise from
  the time series.
  </p>

  <p>
  The identification of noise is complex and I have not found any
  material that I could understand which discussed noise
  identification in the context of wavelets.  I did find some material
  that has been difficult and frustrating.  In particular
  <i>Image Processing and Data Analysis: the multiscale approach</i>
  by Starck, Murtagh and Bijaoui.
  </p>
  
  <p>
  If the price of a stock follows a random walk, its price will be
  distributed in a bell (gaussian) curve.  This is one way of stating
  the concept from financial theory that the daily return is normally
  distributed (here daily return is defined as the difference between
  yesterdays close price and today's close price).  Movement outside
  the bounds of the curve may represent something other than a random walk
  and so, in theory, might be interesting.
  </p>

  <p> 
  At least in the case of the single test case used in developing this
  code (Applied Materials, symbol: AMAT), the coefficient distribution
  in the highest frequency is almost a perfect normal curve.  That is,
  the mean is close to zero and the standard deviation is close to
  one.  The area under this curve is very close to one.  This
  resolution approximates the daily return.  At lower frequencies the
  mean moves away from zero and the standard deviation increases.
  This results is a flattened curve, whose area in the coefficient
  range is increasingly less than one.
  </p>

  <p> 
  The code in this class subtracts the normal curve from the
  coefficients at each frequency up to some minimum.  This leaves only
  the coefficients above the curve which are used to regenerate the
  time series (without the noise, in theory).  This filter removes 50
  to 60 percent of the coefficients.
  </p>

  <p>
  Its probably worth mentioning that there are other kinds of
  noise, most notably Poisson noise.  In theory daily data
  tends to show gaussian noise, while intraday data would
  should Poisson noise.  Intraday Poisson noise would result
  from the random arrival and size of orders.
  </p>

  <p>
  This function has two public methods:
  </p>
  <ol>
  <li>
  <p>n
  <i>filter_time_series</i>, which is passed a file name and a time series
  </p>
  </li>
  <li>
  <p>
  <i>gaussian_filter</i> which is passed a set of Haar coefficient
  spectrum and an array allocated for the noise values.  The
  noise array will be the same size as the coefficient array.
  </p>
  </li>
  <ol>
    
  </p>

 */
public class noise_filter extends plot {

String class_name() { return "noise_filter"; }

  /**
    <p>
    The point class represents a coefficient value so that it can be
    sorted for histogramming and then resorted back into the orignal
    ordering (e.g., sorted by value and then sorted by index)
    </p>
   */
  private class point {
    point(int i, double v)
    {
      index = i;
      val = v;
    }
    public int index;  // index in original array
    public double val; // coefficient value
  } // point


  /**
    <p>
    A histogram bin
    </p>
    <p>
    For a histogram bin b<sub>i</sub>, the range of
    values is b<sub>i</sub>.start to b<sub>i+1</sub>.start.
    </p>
    <p>
    The vector object <i>vals</i> stores references to 
    the point objects which fall in the bin range.
    </p>
    <p>
    The number of values in the bin is <i>vals.size()</i>
    </p>
   */
  private class bin {
    bin( double s ) { start = s; }
    public double start;
    public Vector vals = new Vector();
  } // bin

   /**
      Bell curve info: mean, sigma (the standard deviation)
    */
   private class bell_info {
     public bell_info() {}
     public bell_info(double m, double s)
     {
       mean = m;
       sigma = s;
     }
     public double mean;
     public double sigma;
   } // bell_info



  /**

    <p>
    Build a histogram from the sorted data in the pointz
    array.  The histogram is constructed by appending a
    point object to the the bin <i>vals</i> Vector if the value
    of the point is between b[i].start and b[i].start + step.
    </p>

   */
  private void histogram( bin binz[], point pointz[] )
  {
    double step = binz[1].start - binz[0].start;
    double start = binz[0].start;
    double end = binz[1].start;
    int len = pointz.length;
    double max = binz[ binz.length-1 ].start + step;

    int i = 0;
    int ix = 0;
    while (i < len && ix < binz.length) {
      if (pointz[i].val >= start && pointz[i].val < end) {
	binz[ix].vals.addElement( pointz[i] );
	i++;
      }
      else {
	ix++;
	start = end;
	end = end + step;
      }
    } // while
  } // histogram


  
  /**
    Sort an array of <i>point</i> objects by the
    index field.
   */
  private class sort_by_index extends generic_sort {
    
    /**

      if (a.index == b.index) return 0
      if (a.index < b.index) return -1
      if (a.index > b.index) return 1;

     */
    protected int compare( Object a, Object b )
    {
      int rslt = 0;
      point t_a = (point)a;
      point t_b = (point)b;

      if (t_a.index < t_b.index)
	rslt = -1;
      else if (t_a.index > t_b.index)
	rslt = 1;

      return rslt;
    } // compare

  } // sort_by_index


  /**
    Sort an array of <i>point</i> objects by the
    val filed.
   */
  private class sort_by_val extends generic_sort {

    /**

      if (a.val == b.val) return 0
      if (a.val < b.val) return -1
      if (a.val > b.val) return 1;

     */
    protected int compare( Object a, Object b )
    {
      int rslt = 0;
      point t_a = (point)a;
      point t_b = (point)b;

      if (t_a.val < t_b.val)
	rslt = -1;
      else if (t_a.val > t_b.val)
	rslt = 1;

      return rslt;
    } // compare

  } // sort_by_val


  /**
    Allocate an array of histogram bins that is <i>num_bins</i> in
    length.  Initialize the start value of each bin with
    a start value calculated from <i>low</i> and <i>high</i>.
   */
  private bin[] alloc_bins( int num_bins, double low, double high )
  {
    double range = high - low;
    double step = range / (double)num_bins;
    double start = low;

    bin binz[] = new bin[ num_bins ];
    for (int i = 0; i < num_bins; i++) {
      binz[i] = new bin( start );
      start = start + step;
    }

    return binz;
  } // alloc_bins


  /**
    <p>
    Calculate the histogram of the coefficients using 
    <i>num_bins</i> histogram bins
    </p>
    <p>
    The Haar coefficients are stored in point objects
    which consist of the coefficient value and the
    index in the point array.
    </p>
    <p>
    To calculate the histogram, the pointz array is
    sorted by value.  After it is histogrammed it
    is resorted by index to return the original ordering.
    </p>
   */
  private bin[] calc_histo( point pointz[], int num_bins )
  {
    // sort by value
    sort_by_val by_val = new sort_by_val();
    by_val.sort( pointz );

    int len = pointz.length;
    double low = pointz[0].val;
    double high = pointz[len-1].val;

    bin binz[] = alloc_bins( num_bins, low, high );
    histogram( binz, pointz );

    // return the array to its original order by sorting by index
    sort_by_index by_index = new sort_by_index();
    by_index.sort( pointz );
    
    return binz;
  } // calc_histo


  /**
    <p>
    Allocate and initialize an array of <i>point</i> objects.
    The size of the array is <tt><i>end</i> - <i>start</i></tt>.
    Each point object in the array is initialized with its
    index and a Haar coefficient (from the <i>coef</i> array).
    </p>
    <p>
    Since the allocation code has to iterate through the 
    coefficient spectrum the mean and standard deviation
    are also calculated to avoid an extra iteration.  These
    values are returned in the <i>bell_info</i> object.
    </p>
   */
  private point[] alloc_points( double coef[], 
				int start, 
				int end,
				bell_info info )
  {
    int size = end - start;
    point pointz[] = new point[ size ];

    double sum = 0;
    int ix = start;
    for (int i = 0; i < size; i++) {
      pointz[i] = new point( i, coef[ix] );
      sum = sum + coef[ix];
      ix++;
    }
    double mean = sum / (double)size;
    
    // now calculate the standard deviation
    double stdDevSum = 0;
    double x;
    for (int i = 0; i < size; i++) {
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -