Program Listing for File AnalysisGraph.hpp
↰ Return to documentation for file (lib/AnalysisGraph.hpp
)
#pragma once
#include "definitions.h"
#include <Eigen/Dense>
#include <unsupported/Eigen/MatrixFunctions>
#include <boost/graph/graph_traits.hpp>
#include <boost/range/adaptor/transformed.hpp>
#include <boost/range/algorithm/for_each.hpp>
#include <boost/range/iterator_range.hpp>
#include <range/v3/all.hpp>
#include <sqlite3.h>
#include "graphviz_interface.hpp"
#include "DiGraph.hpp"
#include "Tran_Mat_Cell.hpp"
#include <fmt/format.h>
#include <nlohmann/json.hpp>
#ifdef MULTI_THREADING
#include <future>
#endif
#ifdef TIME
#include "CSVWriter.hpp"
#endif
const double tuning_param = 1.0;
enum InitialBeta { ZERO, ONE, HALF, MEAN, MEDIAN, PRIOR, RANDOM };
//enum class InitialBeta : char { ZERO, ONE, HALF, MEAN, MEDIAN, PRIOR, RANDOM };
enum InitialDerivative { DERI_ZERO, DERI_PRIOR };
enum HeadNodeModel { HNM_NAIVE, HNM_FOURIER };
typedef std::unordered_map<std::string, std::vector<double>>
AdjectiveResponseMap;
// This is a multimap to keep provision to have multiple observations per
// time point per indicator.
// Access (concept is a vertex in the CAG)
// [ concept ][ indicator ][ epoch --→ observation ]
typedef std::vector<std::vector<std::multimap<long, double>>>
ConceptIndicatorData;
// Keeps the sequence of dates for which data points are available
// Data points are sorted according to dates
// Access:
// [ concept ][ indicator ][epoch]
typedef std::vector<std::vector<long>> ConceptIndicatorEpochs;
// Access
// [ timestep ][ concept ][ indicator ][ observation ]
typedef std::vector<std::vector<std::vector<std::vector<double>>>>
ObservedStateSequence;
typedef std::vector<std::vector<std::vector<double>>>
PredictedObservedStateSequence;
typedef std::pair<std::tuple<std::string, int, std::string>,
std::tuple<std::string, int, std::string>>
CausalFragment;
// { concept_name --> (ind_name, [obs_0, obs_1, ... ])}
typedef std::unordered_map<std::string,
std::pair<std::string, std::vector<double>>>
ConceptIndicatorAlignedData;
typedef std::tuple<std::vector<std::string>, std::vector<int>, std::string>
EventCollection;
typedef std::pair<EventCollection, EventCollection>
CausalFragmentCollection;
// Access
// [ sample ][ time_step ]{ vertex_name --> { indicator_name --> pred}}
// [ sample ][ time_step ][ vertex_name ][ indicator_name ]
typedef std::vector<std::vector<
std::unordered_map<std::string, std::unordered_map<std::string, double>>>>
FormattedPredictionResult;
// Access
// [ vertex_name ][ timestep ][ sample ]
typedef std::unordered_map<std::string, std::vector<std::vector<double>>>
FormattedProjectionResult;
// Access
// get<0>:
// Training range
// <<start_year, start_month>, <end_year, end_month>>
// get<1>:
// Sequence of prediction time steps
// [yyyy-mm₀, yyyy-mm₁, yyyy-mm₂, yyyy-mm₃, .....]
// get<2>:
// Prediction results
// [ sample ][ time_step ]{ vertex_name --> { indicator_name --> pred}}
// [ sample ][ time_step ][ vertex_name ][ indicator_name ]
typedef std::tuple<std::pair<std::pair<int, int>, std::pair<int, int>>,
std::vector<std::string>,
FormattedPredictionResult>
Prediction;
// Format AnalysisGraph state to output
// [ concept name ] --> [ ind1, ind2, ... ]
typedef std::unordered_map<std::string, std::vector<std::string>> ConceptIndicators;
// List of edges [(source, target), ...]
typedef std::vector<std::pair<std::string, std::string>> Edges;
// List of adjectives [(source, target), ...]
typedef std::vector<std::pair<std::string, std::string>> Adjectives;
// List of polarities [(source, target), ...]
typedef std::vector<std::pair<int, int>> Polarities;
// Vector of theta priors and samples pairs for each edge
// Ordering is according to order of edges in Edges data vector
// For each edge, there is a tuple of vectors
// first element of the tuple is a vector of theta priors KDEs
// second element of the tuple is a vector of sampled thetas
// [([p1, p1, ...], [s1, s2, ...]), ... ]
// Each tuple: <dataset, sampled thertas, log prior histogram>
// TODO: remove dataset and convert this to a pair
//typedef std::vector<std::pair<std::vector<double>, std::vector<double>>> Thetas;
typedef std::vector<std::tuple<std::vector<double>, std::vector<double>, std::vector<double>>> Thetas;
// Sampled Derivatives for each concept
// Access
// [ concept name ] --> [s_1, s_2, ..., s_res ]
typedef std::unordered_map<std::string, std::vector<double>> Derivatives;
// Data
// Access
// [ indicator name ] --> {
// [ "Time Step" ] --> [ts1, ts2, ...]
// [ "Data" ] --> [ d1, d2, ...]
// }
typedef std::unordered_map<std::string, std::unordered_map<std::string, std::vector<double>>> Data;
// Predictions
// Access
// [ indicator name ] --> {
// [ ts ] --> [p_1, p_2, p_3, ..., p_res]
// }
typedef std::unordered_map<std::string, std::unordered_map<int, std::vector<double>>> Predictions;
typedef std::unordered_map<std::string, std::unordered_map<std::string, std::vector<double>>>
CredibleIntervals;
typedef std::tuple<
ConceptIndicators,
Edges, // List of edges [(source, target), ...]
Adjectives,
Polarities,
// Theta priors and samples for each edge
// [(priors, samples), ... ]
Thetas,
Derivatives,
// Data year month range
//std::vector<std::string>,
std::vector<long>,
// Data
Data,
// Prediction year month range
//std::vector<std::string>,
std::vector<double>,
Predictions,
CredibleIntervals,
// Log likelihoods
std::vector<double>,
int // Number of bins in theta prior distributions
> CompleteState;
// Access
// [prediction time step] -->
// get<0>:
// concept name
// get<1>
// indicator name
// get<2>
// value
typedef std::unordered_map<int, std::vector<std::tuple<std::string,
std::string, double>>> ConstraintSchedule;
typedef boost::graph_traits<DiGraph>::edge_descriptor EdgeDescriptor;
typedef boost::graph_traits<DiGraph>::edge_iterator EdgeIterator;
typedef std::multimap<std::pair<int, int>, std::pair<int, int>>::iterator
MMapIterator;
AdjectiveResponseMap construct_adjective_response_map(size_t n_kernels);
class AnalysisGraph {
private:
#ifdef TIME
std::pair<std::vector<std::string>, std::vector<long>> durations;
std::pair<std::vector<std::string>, std::vector<long>> mcmc_part_duration;
CSVWriter writer;
std::string timing_file_prefix = "";
int timing_run_number = 0;
#endif
// True only when Delphi is run through the CauseMos HMI.
bool causemos_call = false;
DiGraph graph;
// Handle to the random number generator singleton object
RNG* rng_instance = nullptr;
std::mt19937 rand_num_generator;
// Uniform distribution used by the MCMC sampler
std::uniform_real_distribution<double> uni_dist;
// Normal distribution used to perturb β
std::normal_distribution<double> norm_dist;
// Uniform discrete distributions used by the MCMC sampler
// to perturb the initial latent state
std::uniform_int_distribution<int> uni_disc_dist;
// to sample an edge
std::uniform_int_distribution<int> uni_disc_dist_edge;
// Sampling resolution
size_t res;
// Number of KDE kernels
size_t n_kde_kernels = 200;
/*
============================================================================
Meta Data Structures
============================================================================
*/
// Maps each concept name to the vertex id of the
// vertex that concept is represented in the CAG
// concept name --> CAG vertex id
std::unordered_map<std::string, int> name_to_vertex = {};
// Keeps track of indicators in CAG to ensure there are no duplicates.
std::unordered_set<std::string> indicators_in_CAG;
// A_beta_factors is a 2D array (std::vector of std::vectors) that keeps track
// of the β factors involved with each cell of the transition matrix A.
//
// According to our current model, which uses variables and their partial
// derivatives with respect to each other ( x --> y, βxy = ∂y/∂x ),
// at most half of the transition matrix cells can be affected by βs.
// According to the way we organize the transition matrix, the cells
// A[row][col] where row is an even index and col is an odd index
// are such cells.
//
// Each cell of matrix A_beta_factors represent all the directed paths
// starting at the vertex equal to the column index of the matrix and
// ending at the vertex equal to the row index of the matrix.
//
// Each cell of matrix A_beta_factors is an object of Tran_Mat_Cell class.
std::vector<std::vector<std::shared_ptr<Tran_Mat_Cell>>> A_beta_factors;
// A set of (row, column) numbers of the 2D matrix A_beta_factors
// where the cell (row, column) depends on β factors.
std::set<std::pair<int, int>> beta_dependent_cells;
// Maps each β to all the transition matrix cells that are dependent on it.
std::multimap<std::pair<int, int>, std::pair<int, int>> beta2cell;
std::unordered_set<int> body_nodes = {};
std::unordered_set<int> head_nodes = {};
std::vector<double> generated_latent_sequence;
int generated_concept;
/*
============================================================================
Sampler Related Variables
============================================================================
*/
// Used to check whether there is a trained model before calling
// generate_prediction()
bool trained = false;
// training was stopped by user input
bool stopped = false;
int n_timesteps = 0;
int pred_timesteps = 0;
std::pair<std::pair<int, int>, std::pair<int, int>> training_range;
std::vector<std::string> pred_range;
long train_start_epoch = -1;
long train_end_epoch = -1;
double pred_start_timestep = -1;
// Number of zero based observation timesteps to each observation point.
// When data aggregation level is MONTHLY, this is the number of months
// When data aggregation level is YEARLY, this is the number of years
std::vector<long> observation_timesteps_sorted;
std::vector<double> modeling_timestep_gaps;
std::vector<double> observation_timestep_unique_gaps;
// Currently Delphi could work either with monthly data or yearly data
// but not with some monthly and some yearly
DataAggregationLevel model_data_agg_level = DataAggregationLevel::MONTHLY;
#ifdef MULTI_THREADING
// A future cannot be copied. So we need to specify a copy assign
// constructor that does not copy this for the code to compile
std::vector<std::future<Eigen::MatrixXd>> matrix_exponential_futures;
#endif
std::unordered_map<double, Eigen::MatrixXd> e_A_ts;
std::unordered_map<double, Eigen::MatrixXd> previous_e_A_ts;
// Computed matrix exponentials of A_fourier_base:
// e^(A_fourier_base * gap)
// for all the gaps we have to advance the system to evolve it to the time
// steps where there are observations.
// Access: [gap] --> e^(A_fourier_base * gap)
std::unordered_map<double, Eigen::MatrixXd> e_A_fourier_ts;
long num_modeling_timesteps_per_one_observation_timestep = 1;
std::unordered_map<int, std::function<double(unsigned int, double)>> external_concepts;
std::vector<unsigned int> concept_sample_pool;
std::vector<EdgeDescriptor> edge_sample_pool;
double t = 0.0;
double delta_t = 1.0;
double log_likelihood = 0.0;
double previous_log_likelihood = 0.0;
double log_likelihood_MAP = 0.0;
int MAP_sample_number = -1;
std::vector<double> log_likelihoods;
// To decide whether to perturb a θ or a derivative
// If coin_flip < coin_flip_thresh perturb θ else perturb derivative
double coin_flip = 0;
double coin_flip_thresh = 0.5;
// Remember the old θ, logpdf(θ) and the edge where we perturbed the θ.
// We need this to revert the system to the previous state if the proposal
// gets rejected.
// Access:
// edge, θ, logpdf(θ)
std::tuple<EdgeDescriptor, double, double> previous_theta;
// Remember the old derivative and the concept we perturbed the derivative
int changed_derivative = 0;
double previous_derivative = 0;
// Latent state that is evolved by sampling.
Eigen::VectorXd s0;
Eigen::VectorXd s0_prev;
double derivative_prior_variance = 0.1;
// Transition matrix that is evolved by sampling.
// Since variable A has been already used locally in other methods,
// I chose to name this A_original. After refactoring the code, we could
// rename this to A.
Eigen::MatrixXd A_original;
Eigen::MatrixXd previous_A_original;
HeadNodeModel head_node_model = HeadNodeModel::HNM_NAIVE;//HeadNodeModel::HNM_FOURIER;//
// Base transition matrix for the Fourier decomposition based head node model
Eigen::MatrixXd A_fourier_base;
// Initial state for the Fourier decomposition based head node model
Eigen::VectorXd s0_fourier;
// Determines whether to use the continuous version or the discretized
// version of the solution for the system of differential equations.
//
// continuous = true:
// Continuous version of the solution. We use the continuous form of the
// transition matrix and matrix exponential.
//
// continuous = false:
// Discretized version of the solution. We use the discretized version of
// the transition matrix and repeated matrix multiplication.
//
// A_discretized = I + A_continuous * Δt
bool continuous = true;
// Access this as
// current_latent_state
Eigen::VectorXd current_latent_state;
// Access this as
// observed_state_sequence[ time step ][ vertex ][ indicator ]
ObservedStateSequence observed_state_sequence;
// Access this as
// prediction_latent_state_sequences[ sample ][ time step ]
std::vector<std::vector<Eigen::VectorXd>> predicted_latent_state_sequences;
// Access this as
// predicted_observed_state_sequences
// [ sample ][ time step ][ vertex ][ indicator ]
std::vector<PredictedObservedStateSequence>
predicted_observed_state_sequences;
PredictedObservedStateSequence test_observed_state_sequence;
// Implementing constraints or interventions.
// -------------------------------------------------------------------------
// We are implementing two ways to constrain the model.
// 1. One-off constraints.
// A latent state is clamped to a constrained value just for the
// specified time step and released to evolve from the subsequent time
// step onward until the next constrained time step and so on.
// E.g. Getting a one time grant.
// 2. Perpetual constraints
// Once a latent state gets clamped at a value at a particular time
// step, it stays clamped at that value in subsequent time steps until
// another constrain overwrites the current constrain or the end of
// the prediction time is reached.
// NOTE: Currently we do not have a way to have a semi-perpetual
// constraint: A constraint is applied perpetually for some number of
// continuous time steps and then switched off. With a little bit of
// work we can implement this. We just need a special constraint value
// to signal end of a constraint. One suggestion is to use NaN.
// E.g. Maintaining a water level of a reservoir at a certain amount.
//
// NOTE: WE either apply One-off or Perpetual constraints to all the
// concepts. The current design does not permit applying mixed
// constraints such that some concepts are constrained one-off while
// some others are constrained perpetual. With a little bit more work,
// we could also achieve this. Moving the constraint type into the
// constraint information data structure would work for keeping track
// of mixed constraint types:
// std::unordered_map<int, std::vector<std::tuple<int, double, bool>>>
// Then we would have to update the constraint processing logic
// accordingly.
// -------------------------------------------------------------------------
//
// NOTE: This implementation of the constraints does not work at all with
// multiple indicators being attached to a single concept. Constraining the
// concept effects all the indicators and we cannot constrain targeted for a
// particular indicator. In the current model we might achieve this by
// constraining the scaling factor (which we incorrectly call as the
// indicator mean).
// Currently we are doing:
// constrained latent state = constrained indicator value / scaling factor
// The constraining that might work with multiple indicators per concept:
// constrained scaling factor = constrained indicator value / latent state
// -------------------------------------------------------------------------
//
// Implementing the One-off constraints:
// -------------------------------------------------------------------------
// To store constraints (or interventions)
// For some times steps of the prediction range, latent state values could be
// constrained to a value external from what the LDS predicts that value
// should be. When prediction happens, if constrains are present at a time
// step for some concepts, the predicted latent state values for those
// concepts are overwritten by the constraints supplied in this data
// structure.
// Access
// [ time step ] --> [(concept id, constrained value), ... ]
// latent_state_constraints.at(time step)
std::unordered_map<int, std::vector<std::pair<int, double>>>
one_off_constraints;
std::unordered_map<int, std::vector<std::pair<int, double>>>
head_node_one_off_constraints;
//
// Implementing Perpetual constraints:
// -------------------------------------------------------------------------
// Access
// [ concept id ] --> constrained value
// perpetual_constraints.at(concept id)
std::unordered_map<int, double> perpetual_constraints;
//
// Deciding which type of constraints to enforce
// one_off_constraints is empty => unconstrained prediction
// is_one_off_constraints = true => One-off constraints
// is_one_off_constraints = false => Perpetual constraints
bool is_one_off_constraints = true;
//
// Deciding whether to clamp the latent variable or the derivative
// true => clamp at derivative
// false => clamp at latent variable
bool clamp_at_derivative = true;
//
// When we are clamping derivatives the clamp sticks since derivatives never
// chance in our current model. So for one-off clamping, we have to reset the
// derivative back to original after the clamping step. This variable
// remembers the time step to reset the clamped derivatives.
int rest_derivative_clamp_ts = -1;
std::vector<Eigen::MatrixXd> transition_matrix_collection;
std::vector<Eigen::VectorXd> initial_latent_state_collection;
//std::vector<std::vector<double>> latent_mean_collection;
//std::vector<std::vector<double>> latent_std_collection;
// Access:
// [sample][node id]{partition --> (mean, std)}
//std::vector<std::vector<std::unordered_map<int, std::pair<double, double>>>> latent_mean_std_collection;
std::vector<Eigen::VectorXd> synthetic_latent_state_sequence;
bool synthetic_data_experiment = false;
/*
============================================================================
Private: Integration with Uncharted's CauseMos interface
(in causemos_integration.cpp)
============================================================================
*/
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
training-progress
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
create-model
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
void extract_concept_indicator_mapping_and_observations_from_json(
const nlohmann::json &json_indicators,
ConceptIndicatorData &concept_indicator_data,
ConceptIndicatorEpochs &concept_indicator_epochs);
static double epoch_to_timestep(long epoch, long train_start_epoch, long modeling_frequency);
void infer_modeling_period(
const ConceptIndicatorEpochs & concept_indicator_observation_timesteps,
long &shortest_gap,
long &longest_gap,
long &frequent_gap,
int &highest_frequency);
void infer_concept_period(const ConceptIndicatorEpochs &concept_indicator_epochs);
void
set_observed_state_sequence_from_json_dict(const nlohmann::json &json_indicators);
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
create-experiment
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
// Epoch --> (year, month, date)
std::tuple<int, int, int> epoch_to_year_month_date(long epoch);
void extract_projection_constraints(
const nlohmann::json &projection_constraints, long skip_steps);
FormattedProjectionResult run_causemos_projection_experiment_from_json_dict(
const nlohmann::json &json_data);
FormattedProjectionResult format_projection_result();
void sample_transition_matrix_collection_from_prior();
/*
============================================================================
Private: Model serialization (in serialize.cpp)
============================================================================
*/
void from_delphi_json_dict(const nlohmann::json &json_data, bool verbose);
/*
============================================================================
Private: Utilities (in graph_utils.cpp)
============================================================================
*/
void clear_state();
void initialize_random_number_generator();
void remove_node(int node_id);
// Allocate a num_verts x num_verts 2D array (std::vector of std::vectors)
void allocate_A_beta_factors();
void find_all_paths_between(int start, int end, int cutoff);
void find_all_paths_between_util(int start,
int end,
std::vector<int>& path,
int cutoff);
int calculate_num_timesteps(int start_year,
int start_month,
int end_year,
int end_month);
/*
============================================================================
Private: Subgraphs (in subgraphs.cpp)
============================================================================
*/
void get_subgraph(int vert,
std::unordered_set<int>& vertices_to_keep,
int cutoff,
bool inward);
void get_subgraph_between(int start,
int end,
std::vector<int>& path,
std::unordered_set<int>& vertices_to_keep,
int cutoff);
/*
============================================================================
Private: Accessors
============================================================================
*/
int num_nodes() { return boost::num_vertices(graph); }
int get_vertex_id(std::string concept) {
using namespace fmt::literals;
try {
return this->name_to_vertex.at(concept);
}
catch (const std::out_of_range& oor) {
throw std::out_of_range("Concept \"{}\" not in CAG!"_format(concept));
}
}
auto node_indices() const {
return boost::make_iterator_range(boost::vertices(this->graph));
};
auto nodes() {
using boost::adaptors::transformed;
return this->node_indices() |
transformed([&](int v) -> Node& { return (*this)[v]; });
};
auto node_names() {
using boost::adaptors::transformed;
return this->nodes() |
transformed([&](auto node) -> std::string { return node.name; });
};
int get_degree(int vertex_id) {
return boost::in_degree(vertex_id, this->graph) +
boost::out_degree(vertex_id, this->graph);
};
auto out_edges(int i) {
return boost::make_iterator_range(boost::out_edges(i, graph));
}
Node& source(EdgeDescriptor e) {
return (*this)[boost::source(e, this->graph)];
};
Node& target(EdgeDescriptor e) {
return (*this)[boost::target(e, this->graph)];
};
auto successors(int i) {
return boost::make_iterator_range(boost::adjacent_vertices(i, this->graph));
}
auto successors(std::string node_name) {
return this->successors(this->name_to_vertex.at(node_name));
}
std::vector<Node> get_successor_list(std::string node) {
std::vector<Node> successors = {};
for (int successor : this->successors(node)) {
successors.push_back((*this)[successor]);
}
return successors;
}
auto predecessors(int i) {
return boost::make_iterator_range(
boost::inv_adjacent_vertices(i, this->graph));
}
auto predecessors(std::string node_name) {
return this->predecessors(this->name_to_vertex.at(node_name));
}
std::vector<Node> get_predecessor_list(std::string node) {
std::vector<Node> predecessors = {};
for (int predecessor : this->predecessors(node)) {
predecessors.push_back((*this)[predecessor]);
}
return predecessors;
}
double get_beta(std::string source_vertex_name,
std::string target_vertex_name) {
// This is ∂target / ∂source
return this->A_original(2 * get_vertex_id(target_vertex_name),
2 * get_vertex_id(source_vertex_name) + 1);
}
/*
============================================================================
Private: Get Training Data Sequence (in train_model.cpp)
============================================================================
*/
void
set_observed_state_sequence_from_data(std::string country = "South Sudan",
std::string state = "",
std::string county = "");
std::vector<std::vector<std::vector<double>>>
get_observed_state_from_data(int year,
int month,
std::string country,
std::string state = "",
std::string county = "");
/*
============================================================================
Private: Initializing model parameters (in parameter_initialization.cpp)
============================================================================
*/
void initialize_parameters(int res = 200,
InitialBeta initial_beta = InitialBeta::ZERO,
InitialDerivative initial_derivative = InitialDerivative::DERI_ZERO,
bool use_heuristic = false,
bool use_continuous = true);
void set_indicator_means_and_standard_deviations();
void init_betas_to(InitialBeta ib = InitialBeta::MEAN);
void construct_theta_pdfs();
/*
============================================================================
Private: Training by MCMC Sampling (in sampling.cpp)
============================================================================
*/
void set_base_transition_matrix();
#ifdef MULTI_THREADING
void compute_multiple_matrix_exponentials_parallelly(
const Eigen::MatrixXd & A,
std::vector<std::future<Eigen::MatrixXd>> &me_futures);
#endif
// Sample elements of the stochastic transition matrix from the
// prior distribution, based on gradable adjectives.
void set_transition_matrix_from_betas();
void set_log_likelihood_helper(int ts);
void set_log_likelihood();
void sample_from_posterior();
// TODO: Need testng
// TODO: Before calling sample_from_proposal() we must call
// AnalysisGraph::find_all_paths()
// TODO: Before calling sample_from_proposal(), we mush assign initial βs and
// run Tran_Mat_Cell::compute_cell() to initialize the first transistion
// matrix.
// TODO: Update Tran_Mat_Cell::compute_cell() to calculate the proper value.
// At the moment it just computes sum of length of all the paths realted to
// this cell
void sample_from_proposal();
void update_transition_matrix_cells(EdgeDescriptor e);
double calculate_delta_log_prior();
void revert_back_to_previous_state();
/*
============================================================================
Private: Modeling head nodes (in head_nodes.cpp)
============================================================================
*/
void partition_data_and_calculate_mean_std_for_each_partition(
Node& n, std::vector<double>& latent_sequence);
void apply_constraint_at(int ts, int node_id);
void generate_head_node_latent_sequence(int node_id,
int num_timesteps,
bool sample,
int seq_no = 0);
void generate_head_node_latent_sequence_from_changes(Node &n,
int num_timesteps,
bool sample);
void generate_head_node_latent_sequences(int samp, int num_timesteps);
void update_head_node_latent_state_with_generated_derivatives(
int ts_current,
int ts_next,
int concept_id,
std::vector<double>& latent_sequence);
void update_latent_state_with_generated_derivatives(int ts_current,
int ts_next);
/*
============================================================================
Private: Prediction (in prediction.cpp)
============================================================================
*/
void check_bounds();
void generate_latent_state_sequences(double initial_prediction_step);
void perturb_predicted_latent_state_at(int timestep, int sample_number);
void generate_observed_state_sequences();
std::vector<std::vector<double>>
generate_observed_state(Eigen::VectorXd latent_state);
FormattedPredictionResult format_prediction_result();
void run_model(int start_year,
int start_month,
int end_year,
int end_month);
void add_constraint(int step, std::string concept_name, std::string indicator_name,
double indicator_clamp_value);
/*
============================================================================
Private: Synthetic Data Experiment (in synthetic_data.cpp)
============================================================================
*/
/*
============================================================================
Private: Graph Visualization (in graphviz.cpp)
============================================================================
*/
std::pair<Agraph_t*, GVC_t*> to_agraph(
bool simplified_labels =
false,
int label_depth =
1,
std::string node_to_highlight = "",
std::string rankdir = "TB");
/*
============================================================================
Private: Fourier decomposition based seasonal head node model (in fourier.cpp)
============================================================================
*/
std::vector<double> generate_frequencies_for_period(int period,
int n_components);
Eigen::MatrixXd evolve_LDS(const Eigen::MatrixXd &A_base,
const Eigen::VectorXd &_s0,
int n_modeling_time_steps,
double step_size = 1);
std::pair<Eigen::MatrixXd, Eigen::VectorXd>
assemble_sinusoidal_generating_LDS(const std::vector<double> &freqs);
/*
std::pair<Eigen::MatrixXd, Eigen::VectorXd>
assemble_sinusoidal_generating_LDS(unsigned short components,
unsigned short period);
*/
Eigen::MatrixXd generate_sinusoidal_values_for_bins(const Eigen::MatrixXd &A_sin_base,
const Eigen::VectorXd &s0_sin,
int period);
// NOTE: This method could be made a method of the Node class. The best
// architecture would be to make a subclass, HeadNode, of Node class and
// include this method there. At the moment we incrementally create the
// graph while identifying head nodes, we are using Node objects
// everywhere. To follow the HeadNode subclass specialization route, we
// either have to replace Node objects with HeadNode objects or do a
// first pass through the input to identify head nodes and then create
// the graph.
void compute_fourier_coefficients_from_least_square_optimization(
const Eigen::MatrixXd &sinusoidals,
int n_components,
std::vector<int> &head_node_ids);
std::pair<Eigen::MatrixXd, Eigen::VectorXd>
assemble_head_node_modeling_LDS(const Eigen::MatrixXd &A_sin_base,
const Eigen::VectorXd &s0_sin,
int n_components,
int n_concepts,
std::unordered_map<int, int> &hn_to_mat_row);
bool determine_the_best_number_of_components(
const Eigen::MatrixXd & A_hn_period_base,
const Eigen::VectorXd & s0_hn_period,
int period,
int n_components,
std::unordered_map<int, int> &hn_to_mat_row);
std::pair<Eigen::MatrixXd, Eigen::VectorXd>
assemble_all_seasonal_head_node_modeling_LDS(
std::unordered_set<double> fourier_frequency_set,
int n_concepts,
std::unordered_map<int, int> &hn_to_mat_row);
void assemble_base_LDS(InitialDerivative id);
void merge_concept_LDS_into_seasonal_head_node_modeling_LDS(
const Eigen::MatrixXd &A_concept_base,
const Eigen::VectorXd s0_concept);
void predictions_to_csv(const Eigen::MatrixXd &A_base,
const Eigen::VectorXd &_s0, int n_time_steps);
std::pair<Eigen::MatrixXd, Eigen::VectorXd>
fit_seasonal_head_node_model_via_fourier_decomposition();
public:
AnalysisGraph() {
one_off_constraints.clear();
perpetual_constraints.clear();
}
~AnalysisGraph() {}
std::string id;
std::string experiment_id = "experiment_id_not_set";
std::string to_json_string(int indent = 0);
bool data_heuristic = false;
// Set the sampling resolution.
void set_res(size_t res);
// Set the number of KDE kernels.
void set_n_kde_kernels(size_t kde_kernels)
{this->n_kde_kernels = kde_kernels;};
// Get the sampling resolution.
size_t get_res();
// there may be a better place in this file for this prototype
void from_causemos_json_dict(const nlohmann::json &json_data,
double belief_score_cutoff,
double grounding_score_cutoff);
/*
============================================================================
Constructors from INDRA-exported JSON (in constructors.cpp)
============================================================================
*/
static AnalysisGraph
from_indra_statements_json_dict(nlohmann::json json_data,
double belief_score_cutoff = 0.9,
double grounding_score_cutoff = 0.0,
std::string ontology = "WM");
static AnalysisGraph
from_indra_statements_json_string(std::string json_string,
double belief_score_cutoff = 0.9,
double grounding_score_cutoff = 0.0,
std::string ontology = "WM");
static AnalysisGraph
from_indra_statements_json_file(std::string filename,
double belief_score_cutoff = 0.9,
double grounding_score_cutoff = 0.0,
std::string ontology = "WM");
static AnalysisGraph
from_causal_fragments(std::vector<CausalFragment> causal_fragments);
static AnalysisGraph
from_causal_fragments_with_data(std::pair<std::vector<CausalFragment>,
ConceptIndicatorAlignedData> cag_ind_data,
int kde_kernels = 5);
static AnalysisGraph from_json_string(std::string);
AnalysisGraph(const AnalysisGraph& rhs);
AnalysisGraph& operator=(AnalysisGraph rhs);
/*
============================================================================
Public: Integration with Uncharted's CauseMos interface
(in causemos_integration.cpp)
============================================================================
*/
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
training-progress
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
bool get_trained(){ return trained; }
bool get_stopped() { return stopped; }
double get_log_likelihood(){ return log_likelihood; }
double get_previous_log_likelihood(){ return previous_log_likelihood; }
double get_log_likelihood_MAP(){ return log_likelihood_MAP; }
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
create-model
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
static AnalysisGraph from_causemos_json_string(std::string json_string,
double belief_score_cutoff = 0,
double grounding_score_cutoff = 0,
int kde_kernels = 4);
static AnalysisGraph from_causemos_json_file(std::string filename,
double belief_score_cutoff = 0,
double grounding_score_cutoff = 0,
int kde_kernels = 4);
std::string generate_create_model_response();
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
create-experiment
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
FormattedProjectionResult
run_causemos_projection_experiment_from_json_string(std::string json_string);
FormattedProjectionResult
run_causemos_projection_experiment_from_json_file(std::string filename);
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
edit-weights
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
unsigned short freeze_edge_weight(std::string source, std::string target,
double scaled_weight, int polarity);
/*
============================================================================
Public: Model serialization (in serialize.cpp)
============================================================================
*/
std::string serialize_to_json_string(bool verbose = true, bool compact = true);
void export_create_model_json_string();
static AnalysisGraph deserialize_from_json_string(std::string json_string, bool verbose = true);
static AnalysisGraph deserialize_from_json_file(std::string filename, bool verbose = true);
/*
============================================================================
Public: Accessors
============================================================================
*/
size_t num_vertices() { return boost::num_vertices(this->graph); }
Node& operator[](std::string node_name) {
return (*this)[this->get_vertex_id(node_name)];
}
Node& operator[](int v) { return this->graph[v]; }
size_t num_edges() { return boost::num_edges(this->graph); }
auto edges() const { return boost::make_iterator_range(boost::edges(graph)); }
Edge& edge(EdgeDescriptor e) { return this->graph[e]; }
Edge& edge(int source, int target) {
return this->graph[boost::edge(source, target, this->graph).first];
}
Edge& edge(int source, std::string target) {
return this->graph
[boost::edge(source, this->get_vertex_id(target), this->graph).first];
}
Edge& edge(std::string source, int target) {
return this->graph
[boost::edge(this->get_vertex_id(source), target, this->graph).first];
}
Edge& edge(std::string source, std::string target) {
return this->graph[boost::edge(this->get_vertex_id(source),
this->get_vertex_id(target),
this->graph)
.first];
}
boost::range_detail::integer_iterator<unsigned long> begin() {
return boost::vertices(this->graph).first;
};
boost::range_detail::integer_iterator<unsigned long> end() {
return boost::vertices(this->graph).second;
};
Eigen::VectorXd& get_initial_latent_state() { return this->s0; };
double get_MAP_log_likelihood() { return this->log_likelihood_MAP; };
/*
============================================================================
Public: Graph Building (in graph_building.cpp)
============================================================================
*/
int add_node(std::string concept);
bool add_edge(CausalFragment causal_fragment);
void add_edge(CausalFragmentCollection causal_fragments);
std::pair<EdgeDescriptor, bool> add_edge(int, int);
std::pair<EdgeDescriptor, bool> add_edge(int, std::string);
std::pair<EdgeDescriptor, bool> add_edge(std::string, int);
std::pair<EdgeDescriptor, bool> add_edge(std::string, std::string);
void remove_node(std::string concept);
// Note:
// Although just calling this->remove_node(concept) within the loop
// for( std::string concept : concept_s )
// is sufficient to implement this method, it is not very efficient.
// It re-calculates directed simple paths for each vertex removed
//
// Therefore, the code in this->remove_node() has been duplicated with
// slightly different flow to achive a more efficient execution.
void remove_nodes(std::unordered_set<std::string> concepts);
void remove_edge(std::string src, std::string tgt);
void remove_edges(std::vector<std::pair<std::string, std::string>> edges);
/*
============================================================================
Public: Subgraphs (in subgraphs.cpp)
============================================================================
*/
// TODO Change the name of this function to something better, like
// restrict_to_subgraph_for_concept, update docstring
AnalysisGraph get_subgraph_for_concept(std::string concept,
bool inward = false,
int depth = -1);
AnalysisGraph get_subgraph_for_concept_pair(std::string source_concept,
std::string target_concept,
int cutoff = -1);
/*
============================================================================
Public: Graph Modification (in graph_modification.cpp)
============================================================================
*/
void prune(int cutoff = 2);
// Merge node n1 into node n2, with the option to specify relative polarity.
// void
// merge_nodes_old(std::string n1, std::string n2, bool same_polarity = true);
void merge_nodes(std::string concept_1,
std::string concept_2,
bool same_polarity = true);
void change_polarity_of_edge(std::string source_concept,
int source_polarity,
std::string target_concept,
int target_polarity);
/*
============================================================================
Public: Indicator Manipulation (in indicator_manipulation.cpp)
============================================================================
*/
int
set_indicator(std::string concept, std::string indicator, std::string source);
void delete_indicator(std::string concept, std::string indicator);
void delete_all_indicators(std::string concept);
void map_concepts_to_indicators(int n = 1, std::string country = "");
/*
============================================================================
Public: Utilities (in graph_utils.cpp)
============================================================================
*/
/*
* Find all the simple paths between all the paris of nodes of the graph
*/
void find_all_paths();
void set_random_seed(int seed);
void set_derivative(std::string, double);
/*
============================================================================
Public: Training the Model (in train_model.cpp)
============================================================================
*/
void train_model(int start_year = 2012,
int start_month = 1,
int end_year = 2017,
int end_month = 12,
int res = 200,
int burn = 10000,
std::string country = "South Sudan",
std::string state = "",
std::string county = "",
std::map<std::string, std::string> units = {},
InitialBeta initial_beta = InitialBeta::ZERO,
InitialDerivative initial_derivative = InitialDerivative::DERI_ZERO,
bool use_heuristic = false,
bool use_continuous = true);
void run_train_model(int res = 200,
int burn = 10000,
HeadNodeModel head_node_model = HeadNodeModel::HNM_NAIVE,
InitialBeta initial_beta = InitialBeta::ZERO,
InitialDerivative initial_derivative = InitialDerivative::DERI_ZERO,
bool use_heuristic = false,
bool use_continuous = true,
int train_start_timestep = 0,
int train_timesteps = -1,
std::unordered_map<std::string, int> concept_periods = {},
std::unordered_map<std::string, std::string> concept_center_measures = {},
std::unordered_map<std::string, std::string> concept_models = {},
std::unordered_map<std::string, double> concept_min_vals = {},
std::unordered_map<std::string, double> concept_max_vals = {},
std::unordered_map
<std::string, std::function<double(unsigned int, double)>>
ext_concepts = {});
void run_train_model_2(int res = 200,
int burn = 10000,
InitialBeta initial_beta = InitialBeta::ZERO,
InitialDerivative initial_derivative = InitialDerivative::DERI_ZERO,
bool use_heuristic = false,
bool use_continuous = true);
/*
============================================================================
Public: Training by MCMC Sampling (in sampling.cpp)
============================================================================
*/
void set_initial_latent_state(Eigen::VectorXd vec) { this->s0 = vec; };
void set_default_initial_state(InitialDerivative id = InitialDerivative::DERI_ZERO);
static void check_multithreading();
/*
============================================================================
Public: Prediction (in prediction.cpp)
============================================================================
*/
Prediction generate_prediction(int start_year,
int start_month,
int end_year,
int end_month,
ConstraintSchedule constraints =
ConstraintSchedule(),
bool one_off = true,
bool clamp_deri = true);
void generate_prediction(int pred_start_timestep,
int pred_timesteps,
ConstraintSchedule constraints =
ConstraintSchedule(),
bool one_off = true,
bool clamp_deri = true);
std::vector<std::vector<double>> prediction_to_array(std::string indicator);
/*
============================================================================
Public: Synthetic data experiment (in synthetic_data.cpp)
============================================================================
*/
static AnalysisGraph generate_random_CAG(unsigned int num_nodes,
unsigned int num_extra_edges = 0);
void generate_synthetic_data(unsigned int num_obs = 48,
double noise_variance = 0.1,
unsigned int kde_kernels = 1000,
InitialBeta initial_beta = InitialBeta::PRIOR,
InitialDerivative initial_derivative = InitialDerivative::DERI_PRIOR,
bool use_continuous = false);
void initialize_random_CAG(unsigned int num_obs,
unsigned int kde_kernels,
InitialBeta initial_beta,
InitialDerivative initial_derivative,
bool use_continuous);
void interpolate_missing_months(std::vector<int> &filled_months, Node &n);
/*
============================================================================
Public: Graph Visualization (in graphviz.cpp)
============================================================================
*/
std::string to_dot();
void
to_png(std::string filename = "CAG.png",
bool simplified_labels =
false,
int label_depth =
1,
std::string node_to_highlight = "",
std::string rankdir = "TB");
/*
============================================================================
Public: Printing (in printing.cpp)
============================================================================
*/
void print_nodes();
void print_edges();
void print_name_to_vertex();
void print_indicators();
void print_A_beta_factors();
void print_latent_state(const Eigen::VectorXd&);
/*
* Prints the simple paths found between all pairs of nodes of the graph
* Groupd according to the starting and ending vertex.
* find_all_paths() should be called before this to populate the paths
*/
void print_all_paths();
// Given an edge (source, target vertex ids - i.e. a β ≡ ∂target/∂source),
// print all the transition matrix cells that are dependent on it.
void print_cells_affected_by_beta(int source, int target);
void print_training_range();
void print_MAP_estimate();
/*
============================================================================
Public: Formatting output (in format_output.cpp)
============================================================================
*/
CredibleIntervals get_credible_interval(Predictions preds);
CompleteState get_complete_state();
/*
============================================================================
Public: Database interactions (in database.cpp)
============================================================================
*/
sqlite3* open_delphi_db(int mode = SQLITE_OPEN_READONLY);
void write_model_to_db(std::string model_id);
AdjectiveResponseMap construct_adjective_response_map(
std::mt19937 gen,
std::uniform_real_distribution<double>& uni_dist,
std::normal_distribution<double>& norm_dist,
size_t n_kernels
);
/*
============================================================================
Public: Profiling Delphi (in profiler.cpp)
============================================================================
*/
void initialize_profiler(int res = 100,
int kde_kernels = 1000,
InitialBeta initial_beta = InitialBeta::ZERO,
InitialDerivative initial_derivative = InitialDerivative::DERI_ZERO,
bool use_continuous = true);
void profile_mcmc(int run = 1, std::string file_name_prefix = "mcmc_timing");
void profile_kde(int run = 1, std::string file_name_prefix = "kde_timing");
void profile_prediction(int run = 1, int pred_timesteps = 24, std::string file_name_prefix = "prediction_timing");
void profile_matrix_exponential(int run = 1,
std::string file_name_prefix = "mat_exp_timing",
std::vector<double> unique_gaps = {1, 2, 5},
int repeat = 30,
bool multi_threaded = false);
#ifdef TIME
void set_timing_file_prefix(std::string tfp) {this->timing_file_prefix = tfp;}
void create_mcmc_part_timing_file()
{
std::string filename = this->timing_file_prefix + "embeded_" +
std::to_string(this->num_nodes()) + "-" +
std::to_string(this->num_nodes()) + "_" +
std::to_string(this->timing_run_number) + "_" +
delphi::utils::get_timestamp() + ".csv";
this->writer = CSVWriter(filename);
std::vector<std::string> headings = {"Run", "Nodes", "Edges", "Wall Clock Time (ns)", "CPU Time (ns)", "Sample Type"};
writer.write_row(headings.begin(), headings.end());
// cout << filename << endl;
}
void set_timing_run_number(int run) {this->timing_run_number = run;}
#endif
};