Program Listing for File KDE.cpp

Return to documentation for file (lib/KDE.cpp)

#include "KDE.hpp"
#include <boost/lambda/bind.hpp>
#include <boost/lambda/lambda.hpp>
#include <boost/range/adaptors.hpp>
#include <boost/range/algorithm/for_each.hpp>
#include <boost/range/irange.hpp>
#include <boost/range/numeric.hpp>
#include <random>
#include <math.h>

using namespace std;
using namespace delphi::utils;
using boost::irange, boost::adaptors::transformed, boost::lambda::_1;

double sample_from_normal(
    std::mt19937 gen,
    double mu = 0.0,
    double sd = 1.0
) {
  normal_distribution<> d{mu, sd};
  return d(gen);
}

KDE::KDE(std::vector<double> v) : dataset(v) {

  // Compute the bandwidth using Silverman's rule
  mu = mean(v);
  auto X = v | transformed(_1 - mu);

  // Compute standard deviation of the sample.
  size_t N = v.size();
  double stdev = sqrt(inner_product(X, X, 0.0) / (N - 1));
  bw = pow(4 * pow(stdev, 5) / (3 * N), 1 / 5);
}

KDE::KDE(vector<double> thetas, int n_bins)  : n_bins(n_bins) {
  this->dataset = thetas;
  this->mu = mean(thetas);
  double small_count = 0.00001; // To avoid log(0)
  this->log_prior_hist = vector<double>(n_bins, small_count);
  this->delta_theta = M_PI / n_bins;

  int highest_freq = 0;
  int highest_freq_bin = 0;

//  int bin_lo = n_bins - 1;
//  int bin_hi = 0;

  for (double theta : thetas) {
//    theta = theta < 0 ? M_PI + theta : theta;

    int bin = this->theta_to_bin(theta);
//    bin_lo = bin < bin_lo ? bin : bin_lo;
//    bin_hi = bin > bin_hi ? bin : bin_hi;

    this->log_prior_hist[bin] += 1;

    if (highest_freq < this->log_prior_hist[bin]) {
      highest_freq = this->log_prior_hist[bin];
      highest_freq_bin = bin;
    }
  }

//  if (bin_lo != bin_hi && bin_lo != (bin_hi + 1) % n_bins)

  this->most_probable_theta = highest_freq_bin * this->delta_theta +
                              this->delta_theta / 2;
  double n_points = thetas.size() + small_count * n_bins;

  for (double & count : this->log_prior_hist) {
    count /= n_points;
    count = log(count);
  }
}

void KDE::set_num_bins(int n_bins) {
  this->n_bins = n_bins;
  this->delta_theta = M_PI / n_bins;
}

int KDE::theta_to_bin(double theta) {
    return floor(theta + M_PI_2 / this->delta_theta);
}


vector<double> KDE::resample(int n_samples,
                             std::mt19937& gen,
                             uniform_real_distribution<double>& uni_dist,
                             normal_distribution<double>& norm_dist) {
  vector<double> samples(n_samples);

  for (int i : irange(0, n_samples)) {
    double element = select_random_element(dataset, gen, uni_dist);

    // Transform the sampled values using a Gaussian distribution
    // ~ ( sampled value, bw)
    // We sample from a standard Gaussian and transform that sample
    // to the desired Gaussian distribution by
    // μ + σ * standard Gaussian sample
    samples[i] = element + bw * norm_dist(gen);
  }

  return samples;
}

// This Should not be called!
double KDE::pdf(double x) {
  double p = 0.0;
  size_t N = this->dataset.size();
  for (double elem : this->dataset) {
    double x1 = exp(-sqr(x - elem) / (2 * sqr(bw)));
    x1 /= N * bw * sqrt(2 * M_PI);
    p += x1;
  }
  return p;
}

vector<double> KDE::pdf(vector<double> v) {
  vector<double> values;
  for (double elem : v) {
    values.push_back(pdf(elem));
  }
  return values;
}

double KDE::logpdf(double x) { return log(pdf(x)); }