.. _program_listing_file_lib_KDE.cpp: Program Listing for File KDE.cpp ================================ |exhale_lsh| :ref:`Return to documentation for file ` (``lib/KDE.cpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #include "KDE.hpp" #include #include #include #include #include #include #include #include using namespace std; using namespace delphi::utils; using boost::irange, boost::adaptors::transformed, boost::lambda::_1; double sample_from_normal( std::mt19937 gen, double mu = 0.0, double sd = 1.0 ) { normal_distribution<> d{mu, sd}; return d(gen); } KDE::KDE(std::vector v) : dataset(v) { // Compute the bandwidth using Silverman's rule mu = mean(v); auto X = v | transformed(_1 - mu); // Compute standard deviation of the sample. size_t N = v.size(); double stdev = sqrt(inner_product(X, X, 0.0) / (N - 1)); bw = pow(4 * pow(stdev, 5) / (3 * N), 1 / 5); } KDE::KDE(vector thetas, int n_bins) : n_bins(n_bins) { this->dataset = thetas; this->mu = mean(thetas); double small_count = 0.00001; // To avoid log(0) this->log_prior_hist = vector(n_bins, small_count); this->delta_theta = M_PI / n_bins; int highest_freq = 0; int highest_freq_bin = 0; // int bin_lo = n_bins - 1; // int bin_hi = 0; for (double theta : thetas) { // theta = theta < 0 ? M_PI + theta : theta; int bin = this->theta_to_bin(theta); // bin_lo = bin < bin_lo ? bin : bin_lo; // bin_hi = bin > bin_hi ? bin : bin_hi; this->log_prior_hist[bin] += 1; if (highest_freq < this->log_prior_hist[bin]) { highest_freq = this->log_prior_hist[bin]; highest_freq_bin = bin; } } // if (bin_lo != bin_hi && bin_lo != (bin_hi + 1) % n_bins) this->most_probable_theta = highest_freq_bin * this->delta_theta + this->delta_theta / 2; double n_points = thetas.size() + small_count * n_bins; for (double & count : this->log_prior_hist) { count /= n_points; count = log(count); } } void KDE::set_num_bins(int n_bins) { this->n_bins = n_bins; this->delta_theta = M_PI / n_bins; } int KDE::theta_to_bin(double theta) { return floor(theta + M_PI_2 / this->delta_theta); } vector KDE::resample(int n_samples, std::mt19937& gen, uniform_real_distribution& uni_dist, normal_distribution& norm_dist) { vector samples(n_samples); for (int i : irange(0, n_samples)) { double element = select_random_element(dataset, gen, uni_dist); // Transform the sampled values using a Gaussian distribution // ~ ( sampled value, bw) // We sample from a standard Gaussian and transform that sample // to the desired Gaussian distribution by // μ + σ * standard Gaussian sample samples[i] = element + bw * norm_dist(gen); } return samples; } // This Should not be called! double KDE::pdf(double x) { double p = 0.0; size_t N = this->dataset.size(); for (double elem : this->dataset) { double x1 = exp(-sqr(x - elem) / (2 * sqr(bw))); x1 /= N * bw * sqrt(2 * M_PI); p += x1; } return p; } vector KDE::pdf(vector v) { vector values; for (double elem : v) { values.push_back(pdf(elem)); } return values; } double KDE::logpdf(double x) { return log(pdf(x)); }