📄 balltreedensityclass.cc
字号:
//////////////////////////////////////////////////////////////////////////////////////
// KD-tree code extended for use in kernel density estimation
//////////////////////////////////////////////////////////////////////////////////////
//
// Written by Alex Ihler and Mike Mandel
// Copyright (C) 2003 Alexander Ihler; distributable under GPL -- see README.txt
//
//////////////////////////////////////////////////////////////////////////////////////
#define MEX
//#define NEWVERSION
#include <math.h>
#include <assert.h>
#include "mex.h"
#include "BallTreeDensity.h"
double *pMin, *pMax; // need to declare these here, for kernel
double **pAdd, *pErr;
double *min, *max; // derivative functions in kernel.h
#include "kernels.h" // min&max kernel bounds for various kernels
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
//
// EVALUATION
//
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
void pushDownLocal(const BallTree& atTree, const BallTree::index aRoot)
{
BallTree::index close;
if (!atTree.isLeaf(aRoot)) {
close = atTree.left(aRoot);
if (close != BallTree::NO_CHILD) pAdd[0][close] += pAdd[0][aRoot];
close = atTree.right(aRoot);
if (close != BallTree::NO_CHILD) pAdd[0][close] += pAdd[0][aRoot];
pAdd[0][aRoot] = 0;
}
}
void pushDownAll(const BallTree& locations)
{
BallTree::index j;
for (j=locations.root(); j<locations.leafFirst(locations.root())-1; j++) {
pAdd[0][locations.left(j)] += pAdd[0][j];
pAdd[0][locations.right(j)] += pAdd[0][j];
pAdd[0][j] = 0;
}
for (j=locations.leafFirst(locations.root()); j<=locations.leafLast(locations.root()); j++) {
pMin[j] += pAdd[0][j] - pErr[j];
pMax[j] += pAdd[0][j] + pErr[j];
pAdd[0][j] = 0; pErr[j] = 0;
}
}
void recurseMinMax(const BallTree& atTree, const BallTree::index aRoot)
{
BallTree::index l,r; l = atTree.left(aRoot); r = atTree.right(aRoot);
if (!atTree.isLeaf(l)) recurseMinMax(atTree,l);
if (!atTree.isLeaf(r)) recurseMinMax(atTree,r);
pMin[aRoot] = pMin[l]; pMax[aRoot] = pMax[l];
if (pMin[aRoot] > pMin[r]) pMin[aRoot] = pMin[r];
if (pMax[aRoot] < pMax[r]) pMax[aRoot] = pMax[r];
}
/////////////////////////////////////////////////////////////////////
// Recursively evaluate the density implied by the samples of the
// subtree (rooted at dRoot) of densTree at the locations given by
// the subtree (rooted at aRoot) of *this, to within the error
// percentage "maxErr"
/////////////////////////////////////////////////////////////////////
void BallTreeDensity::evaluate(BallTree::index dRoot,
const BallTree& atTree, BallTree::index aRoot,
double maxErr) const
{
unsigned int k, close, far;
double Kmin,Kmax,add,total;
// find the minimum and maximum effect of these two balls on each other
Kmax = minDistKer(dRoot, atTree, aRoot);
Kmin = maxDistKer(dRoot, atTree, aRoot);
total = pMin[ aRoot ]; // take pmin of data below this level
#ifdef NEWVERSION
total += pAdd[0][aRoot] - pErr[aRoot]; // add lower bound from local expansion
#endif
total += weight(dRoot)*Kmin; // also add minimum for this block
// if the weighted contribution of this multiply is below the
// threshold, no need to recurse; just treat as constant
//// //if ( Kmax - Kmin <= maxErr) { // APPROXIMATE: ABSOLUTE
if ( Kmax - Kmin <= maxErr * total) { // APPROXIMATE: PERCENT
Kmin *= weight(dRoot); Kmax *= weight(dRoot);
if (this == &atTree && aRoot==dRoot) { // LEAVE-ONE-OUT (and same subtree)
for (k=atTree.leafFirst(aRoot); k<=atTree.leafLast(aRoot); k++){
pMin[k] += Kmin * (1 - weight(k)/weight(dRoot)); // leave our weight out of it
pMax[k] += Kmax * (1 - weight(k)/weight(dRoot)); //
}
recurseMinMax(atTree,aRoot);
} else { // NO L-O-O => just add away
#ifdef NEWVERSION
pAdd[0][aRoot] += (Kmin + Kmax)/2; pErr[aRoot] = (Kmax-Kmin)/2;
#else
// !!! Should *not* do this -- instead add to local expansion (constant term)
for (k=atTree.leafFirst(aRoot); k<=atTree.leafLast(aRoot); k++) {
pMin[k] += Kmin;
pMax[k] += Kmax;
}
#endif
if (!atTree.isLeaf(aRoot)) { pMin[aRoot] += Kmin; pMax[aRoot] += Kmax; }
}
} else if (Npts(dRoot)*atTree.Npts(aRoot)<=DirectSize){ // DIRECT EVALUATION
evalDirect(dRoot,atTree,aRoot);
} else if (0) { // FAST GAUSS APPROX
// if FGTTerms > 0 : have computed Hermite expansions of densTree (sigma uniform)
// if FGTError(dRoot->Nterms,minDistDtoA,sigma) < maxError * total
// (if maxError, sigma, Nterms known, compute R0 & check >= minDist)
// translate dRoot's hermite expansion to a local expansion around aRoot
// Need to iterate over aRoot's leaves & evaluate? (N log N)
// Update pMin structure...
} else { // RECURSE ON SUBTREES
#ifdef NEWVERSION
// push any local expansion
pushDownLocal(atTree,aRoot);
#endif
// Find the subtree in closest to the other tree's left child and do
// that first so that the values are higher and there is a better
// chance of being able to skip a recursion.
close = atTree.closer( atTree.left(aRoot), atTree.right(aRoot), *this, left(dRoot));
if (left(dRoot) != NO_CHILD && close != NO_CHILD)
evaluate(left(dRoot), atTree, close, maxErr);
far = (close == atTree.left(aRoot)) ? atTree.right(aRoot) : atTree.left(aRoot);
if (left(dRoot) != NO_CHILD && far != NO_CHILD)
evaluate(left(dRoot), atTree, far, maxErr);
// Now the same thing for the density's right child
close = atTree.closer( atTree.left(aRoot), atTree.right(aRoot), *this, right(dRoot));
if (right(dRoot) != NO_CHILD && close != NO_CHILD)
evaluate(right(dRoot), atTree, close, maxErr);
far = (close == atTree.left(aRoot)) ? atTree.right(aRoot) : atTree.left(aRoot);
if (right(dRoot) != NO_CHILD && far != NO_CHILD)
evaluate(right(dRoot), atTree, far, maxErr);
// Propogate additions in children's minimum value to this node
if (!atTree.isLeaf(aRoot)) {
pMin[aRoot] = pMin[ atTree.left(aRoot) ];
pMax[aRoot] = pMax[ atTree.left(aRoot) ];
if (atTree.right(aRoot) != NO_CHILD) {
if (pMin[aRoot] > pMin[ atTree.right(aRoot) ])
pMin[aRoot] = pMin[ atTree.right(aRoot) ];
if (pMax[aRoot] < pMax[ atTree.right(aRoot) ])
pMax[aRoot] = pMax[ atTree.right(aRoot) ];
}
}
}
}
///////////////////////////////////////////
// Maybe we just want to evaluate this stuff directly.
///////////////////////////////////////////
void BallTreeDensity::evalDirect(BallTree::index dRoot, const BallTree& atTree, BallTree::index aRoot) const
{
BallTree::index i,j;
bool firstFlag = true;
double minVal=2e22, maxVal=0;
for (j=atTree.leafFirst(aRoot); j<=atTree.leafLast(aRoot); j++) {
for (i=leafFirst(dRoot); i<=leafLast(dRoot); i++) {
if (this != &atTree || i!=j) { // Check leave-one-out condition;
double d = weight(i) * maxDistKer(i,atTree,j); // Do direct N^2 kernel evaluation
//if (this == &atTree) d /= 1-weight(j); // leave-one-out => renormalize weights
pMin[j] += d;
pMax[j] += d;
}
}
#ifdef NEWVERSION
}
recurseMinMax(atTree,aRoot); // pass up min (& max) value for pruning
#else
if (pMin[j] < minVal) minVal = pMin[j]; // determine min & max value in this block
if (pMax[j] > maxVal) maxVal = pMax[j];
}
pMin[aRoot] = minVal; pMax[aRoot] = maxVal;
#endif
}
/////////////////////////////////////////////////////////////////////
// Dual Tree evaluation: estimate the values at this ball tree's
// points given the other tree as the samples from a distribution.
/////////////////////////////////////////////////////////////////////
void BallTreeDensity::evaluate(const BallTree& locations, double* p, double maxErr) const
{
BallTree::index j;
assert(Ndim() == locations.Ndim());
assert(p != NULL);
pMin = new double[2*locations.Npts()];
pMax = new double[2*locations.Npts()];
for (j=0;j<2*locations.Npts();j++) pMin[j] = pMax[j] = 0;
#ifdef NEWVERSION
pAdd = new double*[1]; pAdd[0] = new double[2*locations.Npts()];
pErr = new double[2*locations.Npts()];
for (j=0;j<2*locations.Npts();j++) pAdd[0][j] = pErr[j] = 0;
#endif
evaluate(root(), locations, locations.root(), 2*maxErr);
// Compute & account for the kernel f'ns normalization constant
double norm = 1;
switch(getType()) {
case Gaussian: norm = pow(2*PI, ((double)Ndim())/2 );
if (bwUniform())
for (unsigned int i=0;i<Ndim();i++) norm *= sqrt(bandwidthMax[i]);
break;
case Laplacian: norm = pow(2, ((double)Ndim()) );
if (bwUniform())
for (unsigned int i=0;i<Ndim();i++) norm *= bandwidthMax[i];
break;
case Epanetchnikov: norm = pow(4.0/3, ((double)Ndim()) );
if (bwUniform())
for (unsigned int i=0;i<Ndim();i++) norm *= bandwidthMax[i];
break;
}
BallTree::index lRoot = locations.root();
#ifdef NEWVERSION
pushDownAll(locations);
#endif
if (this == &locations) { // if we need to do leave-one-out
for (j=locations.leafFirst(lRoot); j<=locations.leafLast(lRoot); j++)
p[locations.getIndexOf(j)] = .5*(pMin[j]+pMax[j])/norm/(1-weight(j));
} else {
for (j=locations.leafFirst(lRoot); j<=locations.leafLast(lRoot); j++)
p[locations.getIndexOf(j)] = .5*(pMin[j]+pMax[j])/norm;
}
delete[] pMin; delete[] pMax;
#ifdef NEWVERSION
delete[] pAdd[0]; delete[] pAdd;
#endif
}
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
//
// GRADIENT CALCULATION
//
// Recursively evaluate the derivative of log-likelihood for two trees
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
double *gradD, *gradA;
////////////////////////////////////////////////////////////////////////////////////
// DIRECT VERSION:
// Just iterate over the N^2 indices; faster than recursion for small N.
////////////////////////////////////////////////////////////////////////////////////
void BallTreeDensity::llGradDirect(BallTree::index dRoot, const BallTree& atTree, BallTree::index aRoot, Gradient gradWRT) const
{
BallTree::index i,j;
unsigned int k;
for (i=atTree.leafFirst(aRoot);i<=atTree.leafLast(aRoot);i++) {
for (j=leafFirst(dRoot);j<=leafLast(dRoot);j++) {
if (this != &atTree || i!=j) { // Check leave-one-out condition;
unsigned long Nj = Ndim() * getIndexOf(j);
unsigned long Ni = atTree.Ndim() * atTree.getIndexOf(i);
dKdX_p(j,atTree,i,true,gradWRT); // use "true" to signal leaf evaluation
if (gradD) for (k=0;k<Ndim();k++) {
gradD[Nj+k] -= weight(j) * atTree.weight(i) * (max[k]+min[k])/2;
}
if (gradA) for (k=0;k<Ndim();k++) {
gradA[Ni+k] += weight(j) * atTree.weight(i) * (max[k]+min[k])/2;
}
} } }
}
////////////////////////////////////////////////////////////////////////////////////
// RECURSIVE VERSION:
// Try to find approximations to speed things up.
////////////////////////////////////////////////////////////////////////////////////
void BallTreeDensity::llGradRecurse(BallTree::index dRoot,const BallTree& atTree, BallTree::index aRoot, double tolGrad, Gradient gradWRT) const
{
BallTree::index i,j,close,far;
unsigned int k;
dKdX_p(dRoot,atTree,aRoot,false,gradWRT); // "false" signals maybe not leaf nodes
double norm = 0;
for (k=0;k<Ndim();k++) norm += .25*(max[k]-min[k])*(max[k]-min[k]);
if (norm <= tolGrad) { // IF OUR APPROXIMATION IS GOOD ENOUGH, ...
if (this == &atTree && aRoot==dRoot) { // LEAVE-ONE-OUT (and same subtree)
if (gradD) for (j=leafFirst(dRoot);j<=leafLast(dRoot);j++) {
unsigned long Nj = Ndim() * getIndexOf(j);
for (k=0;k<Ndim();k++)
gradD[Nj+k] -= (atTree.weight(aRoot)-atTree.weight(j)) * weight(j) * (max[k]+min[k])/2;
}
if (gradA) for (i=atTree.leafFirst(aRoot);i<=atTree.leafLast(aRoot);i++) {
unsigned long Ni = atTree.Ndim() * atTree.getIndexOf(i);
for (k=0;k<Ndim();k++)
gradA[Ni+k] += atTree.weight(i) * (weight(dRoot)-weight(i)) * (max[k]+min[k])/2;
}
} else { // NO LOO; just regular
if (gradD) for (j=leafFirst(dRoot);j<=leafLast(dRoot);j++) {
unsigned long Nj = Ndim() * getIndexOf(j);
for (k=0;k<Ndim();k++)
gradD[Nj+k] -= atTree.weight(aRoot) * weight(j) * (max[k]+min[k])/2;
}
if (gradA) for (i=atTree.leafFirst(aRoot);i<=atTree.leafLast(aRoot);i++) {
unsigned long Ni = atTree.Ndim() * atTree.getIndexOf(i);
for (k=0;k<Ndim();k++)
gradA[Ni+k] += atTree.weight(i) * weight(dRoot) * (max[k]+min[k])/2;
}
}
// OR, IF THERE ARE VERY FEW POINTS
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -