Program Listing for File synthetic_data.cpp
↰ Return to documentation for file (lib/synthetic_data.cpp
)
#include "AnalysisGraph.hpp"
#include <range/v3/all.hpp>
#include <boost/range/algorithm.hpp>
#include <boost/range/adaptors.hpp>
#include <sqlite3.h>
#include <unistd.h>
using namespace std;
using namespace delphi::utils;
using Eigen::VectorXd;
using fmt::print;
/*
============================================================================
Private: Synthetic Data Experiment
============================================================================
*/
/*
============================================================================
Public: Synthetic data experiment
============================================================================
*/
AnalysisGraph AnalysisGraph::generate_random_CAG(unsigned int num_nodes,
unsigned int num_extra_edges) {
AnalysisGraph G;
G.initialize_random_number_generator();
// Get all adjectives.
vector<string> adjectives;
AdjectiveResponseMap adjective_response_map =
G.construct_adjective_response_map(G.rand_num_generator, G.uni_dist, G.norm_dist, 100);
boost::range::copy(adjective_response_map | boost::adaptors::map_keys,
back_inserter(adjectives));
vector<int> cag_nodes = {0};
vector<int> rand_node(2);
vector<string> rand_adjectives(2);
int polarity = 0;
string source = "";
string target = "";
int src_idx = 0;
for (rand_node[1] = 1; rand_node[1] < num_nodes; rand_node[1]++) {
rand_node.clear();
sample(cag_nodes.begin(), cag_nodes.end(), rand_node.begin(), 1, G.rand_num_generator);
cag_nodes.push_back(rand_node[1]);
src_idx = G.uni_dist(G.rand_num_generator) < 0.5? 0 : 1;
source = to_string(rand_node[src_idx]);
target = to_string(rand_node[1 - src_idx]);
sample(adjectives.begin(), adjectives.end(), rand_adjectives.begin(), 2, G.rand_num_generator);
polarity = G.uni_dist(G.rand_num_generator) < 0.5 ? 1 : -1;
auto causal_fragment =
CausalFragment({rand_adjectives[0], 1, source},
{rand_adjectives[1], polarity, target});
G.add_edge(causal_fragment);
}
num_extra_edges = min(num_extra_edges, (num_nodes - 1) * (num_nodes - 1));
pair<EdgeDescriptor, bool> edge;
for (int _ = 0; _ < num_extra_edges; _++) {
edge.second = true;
while (edge.second) {
sample(cag_nodes.begin(), cag_nodes.end(), rand_node.begin(), 2, G.rand_num_generator);
src_idx = G.uni_dist(G.rand_num_generator) < 0.5? 0 : 1;
source = to_string(rand_node[src_idx]);
target = to_string(rand_node[1 - src_idx]);
edge = boost::edge(G.get_vertex_id(source),
G.get_vertex_id(target), G.graph);
}
sample(adjectives.begin(), adjectives.end(), rand_adjectives.begin(), 2, G.rand_num_generator);
polarity = G.uni_dist(G.rand_num_generator) < 0.5 ? 1 : -1;
auto causal_fragment =
CausalFragment({rand_adjectives[0], 1, source},
{rand_adjectives[1], polarity, target});
G.add_edge(causal_fragment);
}
RNG::release_instance();
return G;
}
void AnalysisGraph::initialize_random_CAG(unsigned int num_obs,
unsigned int kde_kernels,
InitialBeta initial_beta,
InitialDerivative initial_derivative,
bool use_continuous) {
this->initialize_random_number_generator();
this->set_default_initial_state(initial_derivative);
this->n_kde_kernels = kde_kernels;
this->construct_theta_pdfs();
this->init_betas_to(initial_beta);
this->pred_timesteps = num_obs + 1;
this->continuous = use_continuous;
this->find_all_paths();
this->set_transition_matrix_from_betas();
this->transition_matrix_collection.clear();
this->initial_latent_state_collection.clear();
// TODO: We are using this variable for two different purposes.
// create another variable.
this->res = 1;
this->transition_matrix_collection = vector<Eigen::MatrixXd>(this->res);
this->initial_latent_state_collection = vector<Eigen::VectorXd>(this->res);
this->transition_matrix_collection[0] = this->A_original;
this->initial_latent_state_collection[0] = this->s0;
}
void AnalysisGraph::generate_synthetic_data(unsigned int num_obs,
double noise_variance,
unsigned int kde_kernels,
InitialBeta initial_beta,
InitialDerivative initial_derivative,
bool use_continuous) {
this->initialize_random_CAG(num_obs, kde_kernels, initial_beta, initial_derivative, use_continuous);
vector<int> periods = {2, 3, 4, 6, 12};
vector<int> rand_period(1);
uniform_real_distribution<double> centers_dist(-100, 100);
int max_samples_per_period = 12;
for (int v : this->head_nodes) {
Node& n = (*this)[v];
sample(periods.begin(), periods.end(), rand_period.begin(), 1, this->rand_num_generator);
n.period = rand_period[0];
int gap_size = max_samples_per_period / n.period;
int offset = 0; // 0 <= offset < period
vector<int> filled_months;
for (int p = 0; p < n.period; p++) {
double center = centers_dist(this->rand_num_generator);
double spread = this->norm_dist(this->rand_num_generator) * 5;
n.centers.push_back(center);
n.spreads.push_back(spread);
int month = offset + gap_size * p;
n.generated_latent_centers_for_a_period[month] = center;
n.generated_latent_spreads_for_a_period[month] = spread;
filled_months.push_back(month);
}
this->interpolate_missing_months(filled_months, n);
print("{0} - {1}\n", n.name, n.period);
}
for (int v = 0; v < this->num_vertices(); v++) {
Node& n = (*this)[v];
n.add_indicator("ind_" + n.name, "synthetic");
n.mean = centers_dist(this->rand_num_generator);
while (n.mean == 0) {
n.mean = centers_dist(this->rand_num_generator);
}
}
this->generate_latent_state_sequences(0);
this->generate_observed_state_sequences();
this->observed_state_sequence.clear();
this->modeling_timestep_gaps.clear();
this->n_timesteps = num_obs;
// Access (concept is a vertex in the CAG)
// [ timestep ][ concept ][ indicator ][ observation ]
this->observed_state_sequence = ObservedStateSequence(this->n_timesteps);
this->modeling_timestep_gaps = vector<double>(this->n_timesteps, 1);
this->modeling_timestep_gaps[0] = 0;
int num_verts = this->num_vertices();
// Fill in observed state sequence
// NOTE: This code is very similar to the implementations in
// set_observed_state_sequence_from_data and get_observed_state_from_data
for (int ts = 0; ts < this->n_timesteps; ts++) {
this->observed_state_sequence[ts] = vector<vector<vector<double>>>(num_verts);
for (int v = 0; v < num_verts; v++) {
Node& n = (*this)[v];
this->observed_state_sequence[ts][v] = vector<vector<double>>(n.indicators.size());
for (int i = 0; i < n.indicators.size(); i++) {
this->observed_state_sequence[ts][v][i] = vector<double>();
this->observed_state_sequence[ts][v][i].push_back(
this->predicted_observed_state_sequences[0][ts + 1][v][i] +
noise_variance * this->norm_dist(this->rand_num_generator));
}
}
}
RNG::release_instance();
}
void AnalysisGraph::interpolate_missing_months(vector<int> &filled_months, Node &n) {
sort(filled_months.begin(), filled_months.end());
// Interpolate values for the missing months
if (filled_months.size() > 1) {
for (int i = 0; i < filled_months.size(); i++) {
int month_start = filled_months[i];
int month_end = filled_months[(i + 1) % filled_months.size()];
int num_missing_months = 0;
if (month_end > month_start) {
num_missing_months = month_end - month_start - 1;
}
else {
num_missing_months = (11 - month_start) + month_end;
}
for (int month_missing = 1;
month_missing <= num_missing_months;
month_missing++) {
n.generated_latent_centers_for_a_period[(month_start + month_missing) % 12] =
((num_missing_months - month_missing + 1) *
n.generated_latent_centers_for_a_period[month_start] +
(month_missing)*n
.generated_latent_centers_for_a_period[month_end]) /
(num_missing_months + 1);
n.generated_latent_spreads_for_a_period[(month_start + month_missing) % 12] =
((num_missing_months - month_missing + 1) *
n.generated_latent_spreads_for_a_period[month_start] +
(month_missing)*n
.generated_latent_spreads_for_a_period[month_end]) /
(num_missing_months + 1);
}
}
} else if (filled_months.size() == 1) {
for (int month = 0; month < n.generated_latent_centers_for_a_period.size(); month++) {
n.generated_latent_centers_for_a_period[month] =
n.generated_latent_centers_for_a_period[filled_months[0]];
n.generated_latent_spreads_for_a_period[month] =
n.generated_latent_spreads_for_a_period[filled_months[0]];
}
}
}