Program Listing for File AnalysisGraph.hpp

Return to documentation for file (lib/AnalysisGraph.hpp)

#pragma once

#include "definitions.h"
#include <Eigen/Dense>
#include <unsupported/Eigen/MatrixFunctions>

#include <boost/graph/graph_traits.hpp>
#include <boost/range/adaptor/transformed.hpp>
#include <boost/range/algorithm/for_each.hpp>
#include <boost/range/iterator_range.hpp>
#include <range/v3/all.hpp>

#include <sqlite3.h>

#include "graphviz_interface.hpp"

#include "DiGraph.hpp"
#include "Tran_Mat_Cell.hpp"
#include <fmt/format.h>
#include <nlohmann/json.hpp>
#ifdef MULTI_THREADING
  #include <future>
#endif

#ifdef TIME
  #include "CSVWriter.hpp"
#endif

const double tuning_param = 1.0;

enum InitialBeta { ZERO, ONE, HALF, MEAN, MEDIAN, PRIOR, RANDOM };
//enum class InitialBeta : char { ZERO, ONE, HALF, MEAN, MEDIAN, PRIOR, RANDOM };
enum InitialDerivative { DERI_ZERO, DERI_PRIOR };
enum HeadNodeModel { HNM_NAIVE, HNM_FOURIER };

typedef std::unordered_map<std::string, std::vector<double>>
    AdjectiveResponseMap;

// This is a multimap to keep provision to have multiple observations per
// time point per indicator.
// Access (concept is a vertex in the CAG)
// [ concept ][ indicator ][ epoch --→ observation ]
typedef std::vector<std::vector<std::multimap<long, double>>>
    ConceptIndicatorData;

// Keeps the sequence of dates for which data points are available
// Data points are sorted according to dates
// Access:
// [ concept ][ indicator ][epoch]
typedef std::vector<std::vector<long>> ConceptIndicatorEpochs;

// Access
// [ timestep ][ concept ][ indicator ][ observation ]
typedef std::vector<std::vector<std::vector<std::vector<double>>>>
    ObservedStateSequence;

typedef std::vector<std::vector<std::vector<double>>>
    PredictedObservedStateSequence;

typedef std::pair<std::tuple<std::string, int, std::string>,
                  std::tuple<std::string, int, std::string>>
    CausalFragment;

// { concept_name --> (ind_name, [obs_0, obs_1, ... ])}
typedef std::unordered_map<std::string,
                           std::pair<std::string, std::vector<double>>>
    ConceptIndicatorAlignedData;

typedef std::tuple<std::vector<std::string>, std::vector<int>, std::string>
    EventCollection;

typedef std::pair<EventCollection, EventCollection>
    CausalFragmentCollection;

// Access
// [ sample ][ time_step ]{ vertex_name --> { indicator_name --> pred}}
// [ sample ][ time_step ][ vertex_name ][ indicator_name ]
typedef std::vector<std::vector<
    std::unordered_map<std::string, std::unordered_map<std::string, double>>>>
    FormattedPredictionResult;

// Access
// [ vertex_name ][ timestep ][ sample ]
typedef std::unordered_map<std::string, std::vector<std::vector<double>>>
    FormattedProjectionResult;

// Access
// get<0>:
//      Training range
//      <<start_year, start_month>, <end_year, end_month>>
// get<1>:
//      Sequence of prediction time steps
//      [yyyy-mm₀, yyyy-mm₁, yyyy-mm₂, yyyy-mm₃, .....]
// get<2>:
//      Prediction results
//      [ sample ][ time_step ]{ vertex_name --> { indicator_name --> pred}}
//      [ sample ][ time_step ][ vertex_name ][ indicator_name ]
typedef std::tuple<std::pair<std::pair<int, int>, std::pair<int, int>>,
                   std::vector<std::string>,
                   FormattedPredictionResult>
    Prediction;

// Format AnalysisGraph state to output
// [ concept name ] --> [ ind1, ind2, ... ]
typedef std::unordered_map<std::string, std::vector<std::string>> ConceptIndicators;

// List of edges [(source, target), ...]
typedef std::vector<std::pair<std::string, std::string>> Edges;

// List of adjectives [(source, target), ...]
typedef std::vector<std::pair<std::string, std::string>> Adjectives;

// List of polarities [(source, target), ...]
typedef std::vector<std::pair<int, int>> Polarities;

// Vector of theta priors and samples pairs for each edge
// Ordering is according to order of edges in Edges data vector
// For each edge, there is a tuple of vectors
// first element of the tuple is a vector of theta priors KDEs
// second element of the tuple is a vector of sampled thetas
// [([p1, p1, ...], [s1, s2, ...]), ... ]
// Each tuple: <dataset, sampled thertas, log prior histogram>
// TODO: remove dataset and convert this to a pair
//typedef std::vector<std::pair<std::vector<double>, std::vector<double>>> Thetas;
typedef std::vector<std::tuple<std::vector<double>, std::vector<double>, std::vector<double>>> Thetas;

// Sampled Derivatives for each concept
// Access
// [ concept name ] --> [s_1, s_2, ..., s_res ]
typedef std::unordered_map<std::string, std::vector<double>> Derivatives;

// Data
// Access
// [ indicator name ] --> {
//                           [ "Time Step" ] --> [ts1, ts2, ...]
//                           [ "Data" ]      --> [ d1,  d2, ...]
//                        }
typedef std::unordered_map<std::string, std::unordered_map<std::string, std::vector<double>>> Data;

// Predictions
// Access
// [ indicator name ] --> {
//                           [ ts ] --> [p_1, p_2, p_3, ..., p_res]
//                        }
typedef std::unordered_map<std::string, std::unordered_map<int, std::vector<double>>> Predictions;

typedef std::unordered_map<std::string, std::unordered_map<std::string, std::vector<double>>>
    CredibleIntervals;

typedef std::tuple<
    ConceptIndicators,
    Edges, // List of edges [(source, target), ...]
    Adjectives,
    Polarities,
    // Theta priors and samples for each edge
    // [(priors, samples), ... ]
    Thetas,
    Derivatives,
    // Data year month range
    //std::vector<std::string>,
    std::vector<long>,
    // Data
    Data,
    // Prediction year month range
    //std::vector<std::string>,
    std::vector<double>,
    Predictions,
    CredibleIntervals,
    // Log likelihoods
    std::vector<double>,
    int // Number of bins in theta prior distributions
            > CompleteState;

// Access
// [prediction time step] -->
//      get<0>:
//          concept name
//      get<1>
//          indicator name
//      get<2>
//          value
typedef std::unordered_map<int, std::vector<std::tuple<std::string,
        std::string, double>>> ConstraintSchedule;

typedef boost::graph_traits<DiGraph>::edge_descriptor EdgeDescriptor;
typedef boost::graph_traits<DiGraph>::edge_iterator EdgeIterator;

typedef std::multimap<std::pair<int, int>, std::pair<int, int>>::iterator
    MMapIterator;

AdjectiveResponseMap construct_adjective_response_map(size_t n_kernels);

class AnalysisGraph {

  private:
  #ifdef TIME
    std::pair<std::vector<std::string>, std::vector<long>> durations;
    std::pair<std::vector<std::string>, std::vector<long>> mcmc_part_duration;
    CSVWriter writer;
    std::string timing_file_prefix = "";
    int timing_run_number = 0;
  #endif

  // True only when Delphi is run through the CauseMos HMI.
  bool causemos_call = false;

  DiGraph graph;


  // Handle to the random number generator singleton object
  RNG* rng_instance = nullptr;

  std::mt19937 rand_num_generator;


  // Uniform distribution used by the MCMC sampler
  std::uniform_real_distribution<double> uni_dist;

  // Normal distribution used to perturb β
  std::normal_distribution<double> norm_dist;

  // Uniform discrete distributions used by the MCMC sampler
  // to perturb the initial latent state
  std::uniform_int_distribution<int> uni_disc_dist;
  // to sample an edge
  std::uniform_int_distribution<int> uni_disc_dist_edge;

  // Sampling resolution
  size_t res;

  // Number of KDE kernels
  size_t n_kde_kernels = 200;

  /*
   ============================================================================
   Meta Data Structures
   ============================================================================
  */

  // Maps each concept name to the vertex id of the
  // vertex that concept is represented in the CAG
  // concept name --> CAG vertex id
  std::unordered_map<std::string, int> name_to_vertex = {};

  // Keeps track of indicators in CAG to ensure there are no duplicates.
  std::unordered_set<std::string> indicators_in_CAG;

  // A_beta_factors is a 2D array (std::vector of std::vectors) that keeps track
  // of the β factors involved with each cell of the transition matrix A.
  //
  // According to our current model, which uses variables and their partial
  // derivatives with respect to each other ( x --> y, βxy = ∂y/∂x ),
  // at most half of the transition matrix cells can be affected by βs.
  // According to the way we organize the transition matrix, the cells
  // A[row][col] where row is an even index and col is an odd index
  // are such cells.
  //
  // Each cell of matrix A_beta_factors represent all the directed paths
  // starting at the vertex equal to the column index of the matrix and
  // ending at the vertex equal to the row index of the matrix.
  //
  // Each cell of matrix A_beta_factors is an object of Tran_Mat_Cell class.
  std::vector<std::vector<std::shared_ptr<Tran_Mat_Cell>>> A_beta_factors;

  // A set of (row, column) numbers of the 2D matrix A_beta_factors
  // where the cell (row, column) depends on β factors.
  std::set<std::pair<int, int>> beta_dependent_cells;

  // Maps each β to all the transition matrix cells that are dependent on it.
  std::multimap<std::pair<int, int>, std::pair<int, int>> beta2cell;

  std::unordered_set<int> body_nodes = {};
  std::unordered_set<int> head_nodes = {};
  std::vector<double> generated_latent_sequence;
  int generated_concept;

  /*
   ============================================================================
   Sampler Related Variables
   ============================================================================
  */

  // Used to check whether there is a trained model before calling
  // generate_prediction()
  bool trained = false;

  // training was stopped by user input
  bool stopped = false;

  int n_timesteps = 0;
  int pred_timesteps = 0;
  std::pair<std::pair<int, int>, std::pair<int, int>> training_range;
  std::vector<std::string> pred_range;
  long train_start_epoch = -1;
  long train_end_epoch = -1;
  double pred_start_timestep = -1;
  // Number of zero based observation timesteps to each observation point.
  // When data aggregation level is MONTHLY, this is the number of months
  // When data aggregation level is YEARLY, this is the number of years
  std::vector<long> observation_timesteps_sorted;
  std::vector<double> modeling_timestep_gaps;
  std::vector<double> observation_timestep_unique_gaps;
  // Currently Delphi could work either with monthly data or yearly data
  // but not with some monthly and some yearly
  DataAggregationLevel model_data_agg_level = DataAggregationLevel::MONTHLY;

  #ifdef MULTI_THREADING
    // A future cannot be copied. So we need to specify a copy assign
    // constructor that does not copy this for the code to compile
    std::vector<std::future<Eigen::MatrixXd>> matrix_exponential_futures;
  #endif
  std::unordered_map<double, Eigen::MatrixXd> e_A_ts;
  std::unordered_map<double, Eigen::MatrixXd> previous_e_A_ts;

  // Computed matrix exponentials of A_fourier_base:
  // e^(A_fourier_base * gap)
  // for all the gaps we have to advance the system to evolve it to the time
  // steps where there are observations.
  // Access: [gap] --> e^(A_fourier_base * gap)
  std::unordered_map<double, Eigen::MatrixXd> e_A_fourier_ts;

  long num_modeling_timesteps_per_one_observation_timestep = 1;

  std::unordered_map<int, std::function<double(unsigned int, double)>> external_concepts;
  std::vector<unsigned int> concept_sample_pool;
  std::vector<EdgeDescriptor> edge_sample_pool;

  double t = 0.0;
  double delta_t = 1.0;

  double log_likelihood = 0.0;
  double previous_log_likelihood = 0.0;
  double log_likelihood_MAP = 0.0;
  int MAP_sample_number = -1;
  std::vector<double> log_likelihoods;

  // To decide whether to perturb a θ or a derivative
  // If coin_flip < coin_flip_thresh perturb θ else perturb derivative
  double coin_flip = 0;
  double coin_flip_thresh = 0.5;

  // Remember the old θ, logpdf(θ) and the edge where we perturbed the θ.
  // We need this to revert the system to the previous state if the proposal
  // gets rejected.
  // Access:
  //        edge, θ, logpdf(θ)
  std::tuple<EdgeDescriptor, double, double> previous_theta;

  // Remember the old derivative and the concept we perturbed the derivative
  int changed_derivative = 0;
  double previous_derivative = 0;

  // Latent state that is evolved by sampling.
  Eigen::VectorXd s0;
  Eigen::VectorXd s0_prev;
  double derivative_prior_variance = 0.1;

  // Transition matrix that is evolved by sampling.
  // Since variable A has been already used locally in other methods,
  // I chose to name this A_original. After refactoring the code, we could
  // rename this to A.
  Eigen::MatrixXd A_original;
  Eigen::MatrixXd previous_A_original;

  HeadNodeModel head_node_model = HeadNodeModel::HNM_NAIVE;//HeadNodeModel::HNM_FOURIER;//
  // Base transition matrix for the Fourier decomposition based head node model
  Eigen::MatrixXd A_fourier_base;
  // Initial state for the Fourier decomposition based head node model
  Eigen::VectorXd s0_fourier;

  // Determines whether to use the continuous version or the discretized
  // version of the solution for the system of differential equations.
  //
  // continuous = true:
  //    Continuous version of the solution. We use the continuous form of the
  //    transition matrix and matrix exponential.
  //
  // continuous = false:
  //    Discretized version of the solution. We use the discretized version of
  //    the transition matrix and repeated matrix multiplication.
  //
  // A_discretized = I + A_continuous * Δt
  bool continuous = true;

  // Access this as
  // current_latent_state
  Eigen::VectorXd current_latent_state;

  // Access this as
  // observed_state_sequence[ time step ][ vertex ][ indicator ]
  ObservedStateSequence observed_state_sequence;

  // Access this as
  // prediction_latent_state_sequences[ sample ][ time step ]
  std::vector<std::vector<Eigen::VectorXd>> predicted_latent_state_sequences;

  // Access this as
  // predicted_observed_state_sequences
  //                            [ sample ][ time step ][ vertex ][ indicator ]
  std::vector<PredictedObservedStateSequence>
      predicted_observed_state_sequences;

  PredictedObservedStateSequence test_observed_state_sequence;

  // Implementing constraints or interventions.
  // -------------------------------------------------------------------------
  // We are implementing two ways to constrain the model.
  //    1. One-off constraints.
  //        A latent state is clamped to a constrained value just for the
  //        specified time step and released to evolve from the subsequent time
  //        step onward until the next constrained time step and so on.
  //        E.g. Getting a one time grant.
  //    2. Perpetual constraints
  //        Once a latent state gets clamped at a value at a particular time
  //        step, it stays clamped at that value in subsequent time steps until
  //        another constrain overwrites the current constrain or the end of
  //        the prediction time is reached.
  //        NOTE: Currently we do not have a way to have a semi-perpetual
  //        constraint: A constraint is applied perpetually for some number of
  //        continuous time steps and then switched off. With a little bit of
  //        work we can implement this. We just need a special constraint value
  //        to signal end of a constraint. One suggestion is to use NaN.
  //        E.g. Maintaining a water level of a reservoir at a certain amount.
  //
  // NOTE: WE either apply One-off or Perpetual constraints to all the
  //       concepts. The current design does not permit applying mixed
  //       constraints such that some concepts are constrained one-off while
  //       some others are constrained perpetual. With a little bit more work,
  //       we could also achieve this. Moving the constraint type into the
  //       constraint information data structure would work for keeping track
  //       of mixed constraint types:
  //        std::unordered_map<int, std::vector<std::tuple<int, double, bool>>>
  //       Then we would have to update the constraint processing logic
  //       accordingly.
  // -------------------------------------------------------------------------
  //
  // NOTE: This implementation of the constraints does not work at all with
  // multiple indicators being attached to a single concept. Constraining the
  // concept effects all the indicators and we cannot constrain targeted for a
  // particular indicator. In the current model we might achieve this by
  // constraining the scaling factor (which we incorrectly call as the
  // indicator mean).
  // Currently we are doing:
  //    constrained latent state = constrained indicator value / scaling factor
  // The constraining that might work with multiple indicators per concept:
  //    constrained scaling factor = constrained indicator value / latent state
  // -------------------------------------------------------------------------
  //
  // Implementing the One-off constraints:
  // -------------------------------------------------------------------------
  // To store constraints (or interventions)
  // For some times steps of the prediction range, latent state values could be
  // constrained to a value external from what the LDS predicts that value
  // should be. When prediction happens, if constrains are present at a time
  // step for some concepts, the predicted latent state values for those
  // concepts are overwritten by the constraints supplied in this data
  // structure.
  // Access
  // [ time step ] --> [(concept id, constrained value), ... ]
  // latent_state_constraints.at(time step)
  std::unordered_map<int, std::vector<std::pair<int, double>>>
      one_off_constraints;
  std::unordered_map<int, std::vector<std::pair<int, double>>>
      head_node_one_off_constraints;
  //
  // Implementing Perpetual constraints:
  // -------------------------------------------------------------------------
  // Access
  // [ concept id ] --> constrained value
  // perpetual_constraints.at(concept id)
  std::unordered_map<int, double> perpetual_constraints;
  //
  // Deciding which type of constraints to enforce
  // one_off_constraints is empty => unconstrained prediction
  // is_one_off_constraints = true => One-off constraints
  // is_one_off_constraints = false => Perpetual constraints
  bool is_one_off_constraints = true;
  //
  // Deciding whether to clamp the latent variable or the derivative
  // true  => clamp at derivative
  // false => clamp at latent variable
  bool clamp_at_derivative = true;
  //
  // When we are clamping derivatives the clamp sticks since derivatives never
  // chance in our current model. So for one-off clamping, we have to reset the
  // derivative back to original after the clamping step. This variable
  // remembers the time step to reset the clamped derivatives.
  int rest_derivative_clamp_ts = -1;

  std::vector<Eigen::MatrixXd> transition_matrix_collection;
  std::vector<Eigen::VectorXd> initial_latent_state_collection;
  //std::vector<std::vector<double>> latent_mean_collection;
  //std::vector<std::vector<double>> latent_std_collection;
  // Access:
  // [sample][node id]{partition --> (mean, std)}
  //std::vector<std::vector<std::unordered_map<int, std::pair<double, double>>>> latent_mean_std_collection;

  std::vector<Eigen::VectorXd> synthetic_latent_state_sequence;
  bool synthetic_data_experiment = false;

  /*
   ============================================================================
   Private: Integration with Uncharted's CauseMos interface
                                                  (in causemos_integration.cpp)
   ============================================================================
  */

            /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                            training-progress
            ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/

            /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                            create-model
            ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/

  void extract_concept_indicator_mapping_and_observations_from_json(
                        const nlohmann::json &json_indicators,
                        ConceptIndicatorData &concept_indicator_data,
                        ConceptIndicatorEpochs &concept_indicator_epochs);


  static double epoch_to_timestep(long epoch, long train_start_epoch, long modeling_frequency);

  void infer_modeling_period(
                        const ConceptIndicatorEpochs & concept_indicator_observation_timesteps,
                        long &shortest_gap,
                        long &longest_gap,
                        long &frequent_gap,
                        int &highest_frequency);

  void infer_concept_period(const ConceptIndicatorEpochs &concept_indicator_epochs);

  void
  set_observed_state_sequence_from_json_dict(const nlohmann::json &json_indicators);

            /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                          create-experiment
            ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/

  // Epoch --> (year, month, date)
  std::tuple<int, int, int> epoch_to_year_month_date(long epoch);

  void extract_projection_constraints(
                                const nlohmann::json &projection_constraints, long skip_steps);

  FormattedProjectionResult run_causemos_projection_experiment_from_json_dict(
                                               const nlohmann::json &json_data);

  FormattedProjectionResult format_projection_result();

  void sample_transition_matrix_collection_from_prior();


  /*
   ============================================================================
   Private: Model serialization (in serialize.cpp)
   ============================================================================
  */

  void from_delphi_json_dict(const nlohmann::json &json_data, bool verbose);

  /*
   ============================================================================
   Private: Utilities (in graph_utils.cpp)
   ============================================================================
  */

  void clear_state();

  void initialize_random_number_generator();

  void remove_node(int node_id);

  // Allocate a num_verts x num_verts 2D array (std::vector of std::vectors)
  void allocate_A_beta_factors();

  void find_all_paths_between(int start, int end, int cutoff);

  void find_all_paths_between_util(int start,
                                   int end,
                                   std::vector<int>& path,
                                   int cutoff);

  int calculate_num_timesteps(int start_year,
                              int start_month,
                              int end_year,
                              int end_month);

  /*
   ============================================================================
   Private: Subgraphs (in subgraphs.cpp)
   ============================================================================
  */

  void get_subgraph(int vert,
                    std::unordered_set<int>& vertices_to_keep,
                    int cutoff,
                    bool inward);

  void get_subgraph_between(int start,
                            int end,
                            std::vector<int>& path,
                            std::unordered_set<int>& vertices_to_keep,
                            int cutoff);

  /*
   ============================================================================
   Private: Accessors
   ============================================================================
  */

  int num_nodes() { return boost::num_vertices(graph); }

  int get_vertex_id(std::string concept) {
    using namespace fmt::literals;
    try {
      return this->name_to_vertex.at(concept);
    }
    catch (const std::out_of_range& oor) {
      throw std::out_of_range("Concept \"{}\" not in CAG!"_format(concept));
    }
  }

  auto node_indices() const {
    return boost::make_iterator_range(boost::vertices(this->graph));
  };

  auto nodes() {
    using boost::adaptors::transformed;
    return this->node_indices() |
           transformed([&](int v) -> Node& { return (*this)[v]; });
  };

  auto node_names() {
    using boost::adaptors::transformed;
    return this->nodes() |
           transformed([&](auto node) -> std::string { return node.name; });
  };

  int get_degree(int vertex_id) {
    return boost::in_degree(vertex_id, this->graph) +
           boost::out_degree(vertex_id, this->graph);
  };

  auto out_edges(int i) {
    return boost::make_iterator_range(boost::out_edges(i, graph));
  }

  Node& source(EdgeDescriptor e) {
    return (*this)[boost::source(e, this->graph)];
  };

  Node& target(EdgeDescriptor e) {
    return (*this)[boost::target(e, this->graph)];
  };

  auto successors(int i) {
    return boost::make_iterator_range(boost::adjacent_vertices(i, this->graph));
  }

  auto successors(std::string node_name) {
    return this->successors(this->name_to_vertex.at(node_name));
  }

  std::vector<Node> get_successor_list(std::string node) {
    std::vector<Node> successors = {};
    for (int successor : this->successors(node)) {
      successors.push_back((*this)[successor]);
    }
    return successors;
  }

  auto predecessors(int i) {
    return boost::make_iterator_range(
        boost::inv_adjacent_vertices(i, this->graph));
  }

  auto predecessors(std::string node_name) {
    return this->predecessors(this->name_to_vertex.at(node_name));
  }

  std::vector<Node> get_predecessor_list(std::string node) {
    std::vector<Node> predecessors = {};
    for (int predecessor : this->predecessors(node)) {
      predecessors.push_back((*this)[predecessor]);
    }
    return predecessors;
  }

  double get_beta(std::string source_vertex_name,
                  std::string target_vertex_name) {

    // This is ∂target / ∂source
    return this->A_original(2 * get_vertex_id(target_vertex_name),
                            2 * get_vertex_id(source_vertex_name) + 1);
  }

  /*
   ============================================================================
   Private: Get Training Data Sequence (in train_model.cpp)
   ============================================================================
  */


  void
  set_observed_state_sequence_from_data(std::string country = "South Sudan",
                                        std::string state = "",
                                        std::string county = "");

  std::vector<std::vector<std::vector<double>>>
  get_observed_state_from_data(int year,
                               int month,
                               std::string country,
                               std::string state = "",
                               std::string county = "");

  /*
   ============================================================================
   Private: Initializing model parameters (in parameter_initialization.cpp)
   ============================================================================
  */

  void initialize_parameters(int res = 200,
                             InitialBeta initial_beta = InitialBeta::ZERO,
                             InitialDerivative initial_derivative = InitialDerivative::DERI_ZERO,
                             bool use_heuristic = false,
                             bool use_continuous = true);

  void set_indicator_means_and_standard_deviations();

  void init_betas_to(InitialBeta ib = InitialBeta::MEAN);

  void construct_theta_pdfs();

  /*
   ============================================================================
   Private: Training by MCMC Sampling (in sampling.cpp)
   ============================================================================
  */

  void set_base_transition_matrix();

  #ifdef MULTI_THREADING
    void compute_multiple_matrix_exponentials_parallelly(
                         const Eigen::MatrixXd & A,
                         std::vector<std::future<Eigen::MatrixXd>> &me_futures);
  #endif

  // Sample elements of the stochastic transition matrix from the
  // prior distribution, based on gradable adjectives.
  void set_transition_matrix_from_betas();

  void set_log_likelihood_helper(int ts);

  void set_log_likelihood();

  void sample_from_posterior();

  // TODO: Need testng
  // TODO: Before calling sample_from_proposal() we must call
  // AnalysisGraph::find_all_paths()
  // TODO: Before calling sample_from_proposal(), we mush assign initial βs and
  // run Tran_Mat_Cell::compute_cell() to initialize the first transistion
  // matrix.
  // TODO: Update Tran_Mat_Cell::compute_cell() to calculate the proper value.
  // At the moment it just computes sum of length of all the paths realted to
  // this cell
  void sample_from_proposal();

  void update_transition_matrix_cells(EdgeDescriptor e);

  double calculate_delta_log_prior();

  void revert_back_to_previous_state();


  /*
   ============================================================================
   Private: Modeling head nodes (in head_nodes.cpp)
   ============================================================================
  */

  void partition_data_and_calculate_mean_std_for_each_partition(
      Node& n, std::vector<double>& latent_sequence);

  void apply_constraint_at(int ts, int node_id);

  void generate_head_node_latent_sequence(int node_id,
                                          int num_timesteps,
                                          bool sample,
                                          int seq_no = 0);

  void generate_head_node_latent_sequence_from_changes(Node &n,
                                                       int num_timesteps,
                                                       bool sample);

  void generate_head_node_latent_sequences(int samp, int num_timesteps);

  void update_head_node_latent_state_with_generated_derivatives(
      int ts_current,
      int ts_next,
      int concept_id,
      std::vector<double>& latent_sequence);

  void update_latent_state_with_generated_derivatives(int ts_current,
                                                      int ts_next);

  /*
   ============================================================================
   Private: Prediction (in prediction.cpp)
   ============================================================================
  */

  void check_bounds();


  void generate_latent_state_sequences(double initial_prediction_step);

  void perturb_predicted_latent_state_at(int timestep, int sample_number);

  void generate_observed_state_sequences();

  std::vector<std::vector<double>>
  generate_observed_state(Eigen::VectorXd latent_state);

  FormattedPredictionResult format_prediction_result();

  void run_model(int start_year,
                 int start_month,
                 int end_year,
                 int end_month);

  void add_constraint(int step, std::string concept_name, std::string indicator_name,
                                                double indicator_clamp_value);

  /*
   ============================================================================
   Private: Synthetic Data Experiment (in synthetic_data.cpp)
   ============================================================================
  */

  /*
   ============================================================================
   Private: Graph Visualization (in graphviz.cpp)
   ============================================================================
  */

  std::pair<Agraph_t*, GVC_t*> to_agraph(
      bool simplified_labels =
          false,
      int label_depth =
          1,
      std::string node_to_highlight = "",
      std::string rankdir = "TB");

  /*
   ============================================================================
   Private: Fourier decomposition based seasonal head node model (in fourier.cpp)
   ============================================================================
  */

  std::vector<double> generate_frequencies_for_period(int period,
                                                      int n_components);

  Eigen::MatrixXd evolve_LDS(const Eigen::MatrixXd &A_base,
                             const Eigen::VectorXd &_s0,
                             int n_modeling_time_steps,
                             double step_size = 1);

  std::pair<Eigen::MatrixXd, Eigen::VectorXd>
    assemble_sinusoidal_generating_LDS(const std::vector<double> &freqs);

  /*
  std::pair<Eigen::MatrixXd, Eigen::VectorXd>
    assemble_sinusoidal_generating_LDS(unsigned short components,
                                       unsigned short period);
  */

  Eigen::MatrixXd generate_sinusoidal_values_for_bins(const Eigen::MatrixXd &A_sin_base,
                                                      const Eigen::VectorXd &s0_sin,
                                                      int period);

  // NOTE: This method could be made a method of the Node class. The best
  //       architecture would be to make a subclass, HeadNode, of Node class and
  //       include this method there. At the moment we incrementally create the
  //       graph while identifying head nodes, we are using Node objects
  //       everywhere. To follow the HeadNode subclass specialization route, we
  //       either have to replace Node objects with HeadNode objects or do a
  //       first pass through the input to identify head nodes and then create
  //        the graph.
  void compute_fourier_coefficients_from_least_square_optimization(
                                            const Eigen::MatrixXd &sinusoidals,
                                            int n_components,
                                            std::vector<int> &head_node_ids);

  std::pair<Eigen::MatrixXd, Eigen::VectorXd>
  assemble_head_node_modeling_LDS(const Eigen::MatrixXd &A_sin_base,
                                  const Eigen::VectorXd &s0_sin,
                                  int n_components,
                                  int n_concepts,
                                  std::unordered_map<int, int> &hn_to_mat_row);

  bool determine_the_best_number_of_components(
                                   const Eigen::MatrixXd & A_hn_period_base,
                                   const Eigen::VectorXd & s0_hn_period,
                                   int period,
                                   int n_components,
                                   std::unordered_map<int, int> &hn_to_mat_row);

  std::pair<Eigen::MatrixXd, Eigen::VectorXd>
      assemble_all_seasonal_head_node_modeling_LDS(
                               std::unordered_set<double> fourier_frequency_set,
                               int n_concepts,
                               std::unordered_map<int, int> &hn_to_mat_row);

  void assemble_base_LDS(InitialDerivative id);

  void merge_concept_LDS_into_seasonal_head_node_modeling_LDS(
                                          const Eigen::MatrixXd &A_concept_base,
                                          const Eigen::VectorXd s0_concept);

  void predictions_to_csv(const Eigen::MatrixXd &A_base,
                          const Eigen::VectorXd &_s0, int n_time_steps);

  std::pair<Eigen::MatrixXd, Eigen::VectorXd>
  fit_seasonal_head_node_model_via_fourier_decomposition();

  public:
  AnalysisGraph() {
     one_off_constraints.clear();
     perpetual_constraints.clear();
  }

  ~AnalysisGraph() {}

  std::string id;
  std::string experiment_id = "experiment_id_not_set";

  std::string to_json_string(int indent = 0);
  bool data_heuristic = false;

  // Set the sampling resolution.
  void set_res(size_t res);

  // Set the number of KDE kernels.
  void set_n_kde_kernels(size_t kde_kernels)
      {this->n_kde_kernels = kde_kernels;};

  // Get the sampling resolution.
  size_t get_res();


  // there may be a better place in this file for this prototype
  void from_causemos_json_dict(const nlohmann::json &json_data,
                               double belief_score_cutoff,
                               double grounding_score_cutoff);

  /*
   ============================================================================
   Constructors from INDRA-exported JSON (in constructors.cpp)
   ============================================================================
  */

  static AnalysisGraph
  from_indra_statements_json_dict(nlohmann::json json_data,
                                  double belief_score_cutoff = 0.9,
                                  double grounding_score_cutoff = 0.0,
                                  std::string ontology = "WM");

  static AnalysisGraph
  from_indra_statements_json_string(std::string json_string,
                                    double belief_score_cutoff = 0.9,
                                    double grounding_score_cutoff = 0.0,
                                    std::string ontology = "WM");

  static AnalysisGraph
  from_indra_statements_json_file(std::string filename,
                                  double belief_score_cutoff = 0.9,
                                  double grounding_score_cutoff = 0.0,
                                  std::string ontology = "WM");

  static AnalysisGraph
  from_causal_fragments(std::vector<CausalFragment> causal_fragments);

  static AnalysisGraph
  from_causal_fragments_with_data(std::pair<std::vector<CausalFragment>,
                                  ConceptIndicatorAlignedData> cag_ind_data,
                                  int kde_kernels = 5);

  static AnalysisGraph from_json_string(std::string);

  AnalysisGraph(const AnalysisGraph& rhs);

  AnalysisGraph& operator=(AnalysisGraph rhs);

  /*
   ============================================================================
   Public: Integration with Uncharted's CauseMos interface
                                                  (in causemos_integration.cpp)
   ============================================================================
  */

            /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                            training-progress
            ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
  bool get_trained(){ return trained; }
  bool get_stopped() { return stopped; }
  double get_log_likelihood(){ return log_likelihood; }
  double get_previous_log_likelihood(){ return previous_log_likelihood; }
  double get_log_likelihood_MAP(){ return log_likelihood_MAP; }

            /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                            create-model
            ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/

  static AnalysisGraph from_causemos_json_string(std::string json_string,
                                                 double belief_score_cutoff = 0,
                                                 double grounding_score_cutoff = 0,
                                                 int kde_kernels = 4);

  static AnalysisGraph from_causemos_json_file(std::string filename,
                                               double belief_score_cutoff = 0,
                                               double grounding_score_cutoff = 0,
                                               int kde_kernels = 4);

  std::string generate_create_model_response();


            /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                          create-experiment
            ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/

  FormattedProjectionResult
  run_causemos_projection_experiment_from_json_string(std::string json_string);

  FormattedProjectionResult
  run_causemos_projection_experiment_from_json_file(std::string filename);


    /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                      edit-weights
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/

  unsigned short freeze_edge_weight(std::string source, std::string target,
                          double scaled_weight, int polarity);


  /*
   ============================================================================
   Public: Model serialization (in serialize.cpp)
   ============================================================================
  */

  std::string serialize_to_json_string(bool verbose = true, bool compact = true);

  void export_create_model_json_string();

  static AnalysisGraph deserialize_from_json_string(std::string json_string, bool verbose = true);

  static AnalysisGraph deserialize_from_json_file(std::string filename, bool verbose = true);

  /*
   ============================================================================
   Public: Accessors
   ============================================================================
  */

  size_t num_vertices() { return boost::num_vertices(this->graph); }

  Node& operator[](std::string node_name) {
    return (*this)[this->get_vertex_id(node_name)];
  }

  Node& operator[](int v) { return this->graph[v]; }

  size_t num_edges() { return boost::num_edges(this->graph); }

  auto edges() const { return boost::make_iterator_range(boost::edges(graph)); }

  Edge& edge(EdgeDescriptor e) { return this->graph[e]; }

  Edge& edge(int source, int target) {
    return this->graph[boost::edge(source, target, this->graph).first];
  }

  Edge& edge(int source, std::string target) {
    return this->graph
        [boost::edge(source, this->get_vertex_id(target), this->graph).first];
  }

  Edge& edge(std::string source, int target) {
    return this->graph
        [boost::edge(this->get_vertex_id(source), target, this->graph).first];
  }

  Edge& edge(std::string source, std::string target) {
    return this->graph[boost::edge(this->get_vertex_id(source),
                                   this->get_vertex_id(target),
                                   this->graph)
                           .first];
  }

  boost::range_detail::integer_iterator<unsigned long> begin() {
    return boost::vertices(this->graph).first;
  };

  boost::range_detail::integer_iterator<unsigned long> end() {
    return boost::vertices(this->graph).second;
  };

  Eigen::VectorXd& get_initial_latent_state() { return this->s0; };

  double get_MAP_log_likelihood() { return this->log_likelihood_MAP; };

  /*
   ============================================================================
   Public: Graph Building (in graph_building.cpp)
   ============================================================================
  */

  int add_node(std::string concept);

  bool add_edge(CausalFragment causal_fragment);
  void add_edge(CausalFragmentCollection causal_fragments);
  std::pair<EdgeDescriptor, bool> add_edge(int, int);
  std::pair<EdgeDescriptor, bool> add_edge(int, std::string);
  std::pair<EdgeDescriptor, bool> add_edge(std::string, int);
  std::pair<EdgeDescriptor, bool> add_edge(std::string, std::string);

  void remove_node(std::string concept);

  // Note:
  //      Although just calling this->remove_node(concept) within the loop
  //          for( std::string concept : concept_s )
  //      is sufficient to implement this method, it is not very efficient.
  //      It re-calculates directed simple paths for each vertex removed
  //
  //      Therefore, the code in this->remove_node() has been duplicated with
  //      slightly different flow to achive a more efficient execution.
  void remove_nodes(std::unordered_set<std::string> concepts);

  void remove_edge(std::string src, std::string tgt);

  void remove_edges(std::vector<std::pair<std::string, std::string>> edges);

  /*
   ============================================================================
   Public: Subgraphs (in subgraphs.cpp)
   ============================================================================
  */

  // TODO Change the name of this function to something better, like
  // restrict_to_subgraph_for_concept, update docstring

  AnalysisGraph get_subgraph_for_concept(std::string concept,
                                         bool inward = false,
                                         int depth = -1);

  AnalysisGraph get_subgraph_for_concept_pair(std::string source_concept,
                                              std::string target_concept,
                                              int cutoff = -1);

  /*
   ============================================================================
   Public: Graph Modification (in graph_modification.cpp)
   ============================================================================
  */

  void prune(int cutoff = 2);

  // Merge node n1 into node n2, with the option to specify relative polarity.
  // void
  // merge_nodes_old(std::string n1, std::string n2, bool same_polarity = true);

  void merge_nodes(std::string concept_1,
                   std::string concept_2,
                   bool same_polarity = true);

  void change_polarity_of_edge(std::string source_concept,
                               int source_polarity,
                               std::string target_concept,
                               int target_polarity);

  /*
   ============================================================================
   Public: Indicator Manipulation (in indicator_manipulation.cpp)
   ============================================================================
  */

  int
  set_indicator(std::string concept, std::string indicator, std::string source);

  void delete_indicator(std::string concept, std::string indicator);

  void delete_all_indicators(std::string concept);

  void map_concepts_to_indicators(int n = 1, std::string country = "");

  /*
   ============================================================================
   Public: Utilities (in graph_utils.cpp)
   ============================================================================
  */

  /*
   * Find all the simple paths between all the paris of nodes of the graph
   */
  void find_all_paths();

  void set_random_seed(int seed);

  void set_derivative(std::string, double);

  /*
   ============================================================================
   Public: Training the Model (in train_model.cpp)
   ============================================================================
  */

  void train_model(int start_year = 2012,
                   int start_month = 1,
                   int end_year = 2017,
                   int end_month = 12,
                   int res = 200,
                   int burn = 10000,
                   std::string country = "South Sudan",
                   std::string state = "",
                   std::string county = "",
                   std::map<std::string, std::string> units = {},
                   InitialBeta initial_beta = InitialBeta::ZERO,
                   InitialDerivative initial_derivative = InitialDerivative::DERI_ZERO,
                   bool use_heuristic = false,
                   bool use_continuous = true);

  void run_train_model(int res = 200,
                   int burn = 10000,
                   HeadNodeModel head_node_model = HeadNodeModel::HNM_NAIVE,
                   InitialBeta initial_beta = InitialBeta::ZERO,
                   InitialDerivative initial_derivative = InitialDerivative::DERI_ZERO,
                   bool use_heuristic = false,
                   bool use_continuous = true,
                   int train_start_timestep = 0,
                   int train_timesteps = -1,
                   std::unordered_map<std::string, int> concept_periods = {},
                   std::unordered_map<std::string, std::string> concept_center_measures = {},
                   std::unordered_map<std::string, std::string> concept_models = {},
                   std::unordered_map<std::string, double> concept_min_vals = {},
                   std::unordered_map<std::string, double> concept_max_vals = {},
                   std::unordered_map
                       <std::string, std::function<double(unsigned int, double)>>
                   ext_concepts = {});

  void run_train_model_2(int res = 200,
                       int burn = 10000,
                       InitialBeta initial_beta = InitialBeta::ZERO,
                       InitialDerivative initial_derivative = InitialDerivative::DERI_ZERO,
                       bool use_heuristic = false,
                       bool use_continuous = true);

  /*
   ============================================================================
   Public: Training by MCMC Sampling (in sampling.cpp)
   ============================================================================
  */

  void set_initial_latent_state(Eigen::VectorXd vec) { this->s0 = vec; };

  void set_default_initial_state(InitialDerivative id = InitialDerivative::DERI_ZERO);

  static void check_multithreading();
  /*
   ============================================================================
   Public: Prediction (in prediction.cpp)
   ============================================================================
  */

  Prediction generate_prediction(int start_year,
                                 int start_month,
                                 int end_year,
                                 int end_month,
                                 ConstraintSchedule constraints =
                                 ConstraintSchedule(),
                                 bool one_off = true,
                                 bool clamp_deri = true);

  void generate_prediction(int pred_start_timestep,
                           int pred_timesteps,
                           ConstraintSchedule constraints =
                           ConstraintSchedule(),
                           bool one_off = true,
                           bool clamp_deri = true);

  std::vector<std::vector<double>> prediction_to_array(std::string indicator);

  /*
   ============================================================================
   Public: Synthetic data experiment (in synthetic_data.cpp)
   ============================================================================
  */

  static AnalysisGraph generate_random_CAG(unsigned int num_nodes,
                                           unsigned int num_extra_edges = 0);

  void generate_synthetic_data(unsigned int num_obs = 48,
                               double noise_variance = 0.1,
                               unsigned int kde_kernels = 1000,
                               InitialBeta initial_beta = InitialBeta::PRIOR,
                               InitialDerivative initial_derivative = InitialDerivative::DERI_PRIOR,
                               bool use_continuous = false);

  void initialize_random_CAG(unsigned int num_obs,
                             unsigned int kde_kernels,
                             InitialBeta initial_beta,
                             InitialDerivative initial_derivative,
                             bool use_continuous);

  void interpolate_missing_months(std::vector<int> &filled_months, Node &n);

  /*
   ============================================================================
   Public: Graph Visualization (in graphviz.cpp)
   ============================================================================
  */

  std::string to_dot();

  void
  to_png(std::string filename = "CAG.png",
         bool simplified_labels =
             false,
         int label_depth =
             1,
         std::string node_to_highlight = "",
         std::string rankdir = "TB");

  /*
   ============================================================================
   Public: Printing (in printing.cpp)
   ============================================================================
  */

  void print_nodes();
  void print_edges();
  void print_name_to_vertex();
  void print_indicators();
  void print_A_beta_factors();
  void print_latent_state(const Eigen::VectorXd&);

  /*
   * Prints the simple paths found between all pairs of nodes of the graph
   * Groupd according to the starting and ending vertex.
   * find_all_paths() should be called before this to populate the paths
   */
  void print_all_paths();

  // Given an edge (source, target vertex ids - i.e. a β ≡ ∂target/∂source),
  // print all the transition matrix cells that are dependent on it.
  void print_cells_affected_by_beta(int source, int target);

  void print_training_range();

  void print_MAP_estimate();

  /*
   ============================================================================
   Public: Formatting output (in format_output.cpp)
   ============================================================================
  */

  CredibleIntervals get_credible_interval(Predictions preds);

  CompleteState get_complete_state();

  /*
   ============================================================================
   Public: Database interactions (in database.cpp)
   ============================================================================
  */

  sqlite3* open_delphi_db(int mode = SQLITE_OPEN_READONLY);

  void write_model_to_db(std::string model_id);

  AdjectiveResponseMap construct_adjective_response_map(
      std::mt19937 gen,
      std::uniform_real_distribution<double>& uni_dist,
      std::normal_distribution<double>& norm_dist,
      size_t n_kernels
  );

  /*
   ============================================================================
   Public: Profiling Delphi (in profiler.cpp)
   ============================================================================
  */

  void initialize_profiler(int res = 100,
                           int kde_kernels = 1000,
                           InitialBeta initial_beta = InitialBeta::ZERO,
                           InitialDerivative initial_derivative = InitialDerivative::DERI_ZERO,
                           bool use_continuous = true);

  void profile_mcmc(int run = 1, std::string file_name_prefix = "mcmc_timing");

  void profile_kde(int run = 1, std::string file_name_prefix = "kde_timing");

  void profile_prediction(int run = 1, int pred_timesteps = 24, std::string file_name_prefix = "prediction_timing");

  void profile_matrix_exponential(int run = 1,
                                  std::string file_name_prefix = "mat_exp_timing",
                                  std::vector<double> unique_gaps = {1, 2, 5},
                                  int repeat = 30,
                                  bool multi_threaded = false);

#ifdef TIME
  void set_timing_file_prefix(std::string tfp) {this->timing_file_prefix = tfp;}
  void create_mcmc_part_timing_file()
  {
      std::string filename = this->timing_file_prefix + "embeded_" +
                              std::to_string(this->num_nodes()) + "-" +
                              std::to_string(this->num_nodes()) + "_" +
                              std::to_string(this->timing_run_number) + "_" +
                              delphi::utils::get_timestamp() + ".csv";
      this->writer = CSVWriter(filename);
      std::vector<std::string> headings = {"Run", "Nodes", "Edges", "Wall Clock Time (ns)", "CPU Time (ns)", "Sample Type"};
      writer.write_row(headings.begin(), headings.end());
//      cout << filename << endl;
  }
  void set_timing_run_number(int run) {this->timing_run_number = run;}
#endif
};