.. _program_listing_file_lib_AnalysisGraph.hpp: Program Listing for File AnalysisGraph.hpp ========================================== |exhale_lsh| :ref:`Return to documentation for file ` (``lib/AnalysisGraph.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #pragma once #include "definitions.h" #include #include #include #include #include #include #include #include #include "graphviz_interface.hpp" #include "DiGraph.hpp" #include "Tran_Mat_Cell.hpp" #include #include #ifdef MULTI_THREADING #include #endif #ifdef TIME #include "CSVWriter.hpp" #endif const double tuning_param = 1.0; enum InitialBeta { ZERO, ONE, HALF, MEAN, MEDIAN, PRIOR, RANDOM }; //enum class InitialBeta : char { ZERO, ONE, HALF, MEAN, MEDIAN, PRIOR, RANDOM }; enum InitialDerivative { DERI_ZERO, DERI_PRIOR }; enum HeadNodeModel { HNM_NAIVE, HNM_FOURIER }; typedef std::unordered_map> AdjectiveResponseMap; // This is a multimap to keep provision to have multiple observations per // time point per indicator. // Access (concept is a vertex in the CAG) // [ concept ][ indicator ][ epoch --→ observation ] typedef std::vector>> ConceptIndicatorData; // Keeps the sequence of dates for which data points are available // Data points are sorted according to dates // Access: // [ concept ][ indicator ][epoch] typedef std::vector> ConceptIndicatorEpochs; // Access // [ timestep ][ concept ][ indicator ][ observation ] typedef std::vector>>> ObservedStateSequence; typedef std::vector>> PredictedObservedStateSequence; typedef std::pair, std::tuple> CausalFragment; // { concept_name --> (ind_name, [obs_0, obs_1, ... ])} typedef std::unordered_map>> ConceptIndicatorAlignedData; typedef std::tuple, std::vector, std::string> EventCollection; typedef std::pair CausalFragmentCollection; // Access // [ sample ][ time_step ]{ vertex_name --> { indicator_name --> pred}} // [ sample ][ time_step ][ vertex_name ][ indicator_name ] typedef std::vector>>> FormattedPredictionResult; // Access // [ vertex_name ][ timestep ][ sample ] typedef std::unordered_map>> FormattedProjectionResult; // Access // get<0>: // Training range // <, > // get<1>: // Sequence of prediction time steps // [yyyy-mm₀, yyyy-mm₁, yyyy-mm₂, yyyy-mm₃, .....] // get<2>: // Prediction results // [ sample ][ time_step ]{ vertex_name --> { indicator_name --> pred}} // [ sample ][ time_step ][ vertex_name ][ indicator_name ] typedef std::tuple, std::pair>, std::vector, FormattedPredictionResult> Prediction; // Format AnalysisGraph state to output // [ concept name ] --> [ ind1, ind2, ... ] typedef std::unordered_map> ConceptIndicators; // List of edges [(source, target), ...] typedef std::vector> Edges; // List of adjectives [(source, target), ...] typedef std::vector> Adjectives; // List of polarities [(source, target), ...] typedef std::vector> Polarities; // Vector of theta priors and samples pairs for each edge // Ordering is according to order of edges in Edges data vector // For each edge, there is a tuple of vectors // first element of the tuple is a vector of theta priors KDEs // second element of the tuple is a vector of sampled thetas // [([p1, p1, ...], [s1, s2, ...]), ... ] // Each tuple: // TODO: remove dataset and convert this to a pair //typedef std::vector, std::vector>> Thetas; typedef std::vector, std::vector, std::vector>> Thetas; // Sampled Derivatives for each concept // Access // [ concept name ] --> [s_1, s_2, ..., s_res ] typedef std::unordered_map> Derivatives; // Data // Access // [ indicator name ] --> { // [ "Time Step" ] --> [ts1, ts2, ...] // [ "Data" ] --> [ d1, d2, ...] // } typedef std::unordered_map>> Data; // Predictions // Access // [ indicator name ] --> { // [ ts ] --> [p_1, p_2, p_3, ..., p_res] // } typedef std::unordered_map>> Predictions; typedef std::unordered_map>> CredibleIntervals; typedef std::tuple< ConceptIndicators, Edges, // List of edges [(source, target), ...] Adjectives, Polarities, // Theta priors and samples for each edge // [(priors, samples), ... ] Thetas, Derivatives, // Data year month range //std::vector, std::vector, // Data Data, // Prediction year month range //std::vector, std::vector, Predictions, CredibleIntervals, // Log likelihoods std::vector, int // Number of bins in theta prior distributions > CompleteState; // Access // [prediction time step] --> // get<0>: // concept name // get<1> // indicator name // get<2> // value typedef std::unordered_map>> ConstraintSchedule; typedef boost::graph_traits::edge_descriptor EdgeDescriptor; typedef boost::graph_traits::edge_iterator EdgeIterator; typedef std::multimap, std::pair>::iterator MMapIterator; AdjectiveResponseMap construct_adjective_response_map(size_t n_kernels); class AnalysisGraph { private: #ifdef TIME std::pair, std::vector> durations; std::pair, std::vector> mcmc_part_duration; CSVWriter writer; std::string timing_file_prefix = ""; int timing_run_number = 0; #endif // True only when Delphi is run through the CauseMos HMI. bool causemos_call = false; DiGraph graph; // Handle to the random number generator singleton object RNG* rng_instance = nullptr; std::mt19937 rand_num_generator; // Uniform distribution used by the MCMC sampler std::uniform_real_distribution uni_dist; // Normal distribution used to perturb β std::normal_distribution norm_dist; // Uniform discrete distributions used by the MCMC sampler // to perturb the initial latent state std::uniform_int_distribution uni_disc_dist; // to sample an edge std::uniform_int_distribution uni_disc_dist_edge; // Sampling resolution size_t res; // Number of KDE kernels size_t n_kde_kernels = 200; /* ============================================================================ Meta Data Structures ============================================================================ */ // Maps each concept name to the vertex id of the // vertex that concept is represented in the CAG // concept name --> CAG vertex id std::unordered_map name_to_vertex = {}; // Keeps track of indicators in CAG to ensure there are no duplicates. std::unordered_set indicators_in_CAG; // A_beta_factors is a 2D array (std::vector of std::vectors) that keeps track // of the β factors involved with each cell of the transition matrix A. // // According to our current model, which uses variables and their partial // derivatives with respect to each other ( x --> y, βxy = ∂y/∂x ), // at most half of the transition matrix cells can be affected by βs. // According to the way we organize the transition matrix, the cells // A[row][col] where row is an even index and col is an odd index // are such cells. // // Each cell of matrix A_beta_factors represent all the directed paths // starting at the vertex equal to the column index of the matrix and // ending at the vertex equal to the row index of the matrix. // // Each cell of matrix A_beta_factors is an object of Tran_Mat_Cell class. std::vector>> A_beta_factors; // A set of (row, column) numbers of the 2D matrix A_beta_factors // where the cell (row, column) depends on β factors. std::set> beta_dependent_cells; // Maps each β to all the transition matrix cells that are dependent on it. std::multimap, std::pair> beta2cell; std::unordered_set body_nodes = {}; std::unordered_set head_nodes = {}; std::vector generated_latent_sequence; int generated_concept; /* ============================================================================ Sampler Related Variables ============================================================================ */ // Used to check whether there is a trained model before calling // generate_prediction() bool trained = false; // training was stopped by user input bool stopped = false; int n_timesteps = 0; int pred_timesteps = 0; std::pair, std::pair> training_range; std::vector pred_range; long train_start_epoch = -1; long train_end_epoch = -1; double pred_start_timestep = -1; // Number of zero based observation timesteps to each observation point. // When data aggregation level is MONTHLY, this is the number of months // When data aggregation level is YEARLY, this is the number of years std::vector observation_timesteps_sorted; std::vector modeling_timestep_gaps; std::vector observation_timestep_unique_gaps; // Currently Delphi could work either with monthly data or yearly data // but not with some monthly and some yearly DataAggregationLevel model_data_agg_level = DataAggregationLevel::MONTHLY; #ifdef MULTI_THREADING // A future cannot be copied. So we need to specify a copy assign // constructor that does not copy this for the code to compile std::vector> matrix_exponential_futures; #endif std::unordered_map e_A_ts; std::unordered_map previous_e_A_ts; // Computed matrix exponentials of A_fourier_base: // e^(A_fourier_base * gap) // for all the gaps we have to advance the system to evolve it to the time // steps where there are observations. // Access: [gap] --> e^(A_fourier_base * gap) std::unordered_map e_A_fourier_ts; long num_modeling_timesteps_per_one_observation_timestep = 1; std::unordered_map> external_concepts; std::vector concept_sample_pool; std::vector edge_sample_pool; double t = 0.0; double delta_t = 1.0; double log_likelihood = 0.0; double previous_log_likelihood = 0.0; double log_likelihood_MAP = 0.0; int MAP_sample_number = -1; std::vector log_likelihoods; // To decide whether to perturb a θ or a derivative // If coin_flip < coin_flip_thresh perturb θ else perturb derivative double coin_flip = 0; double coin_flip_thresh = 0.5; // Remember the old θ, logpdf(θ) and the edge where we perturbed the θ. // We need this to revert the system to the previous state if the proposal // gets rejected. // Access: // edge, θ, logpdf(θ) std::tuple previous_theta; // Remember the old derivative and the concept we perturbed the derivative int changed_derivative = 0; double previous_derivative = 0; // Latent state that is evolved by sampling. Eigen::VectorXd s0; Eigen::VectorXd s0_prev; double derivative_prior_variance = 0.1; // Transition matrix that is evolved by sampling. // Since variable A has been already used locally in other methods, // I chose to name this A_original. After refactoring the code, we could // rename this to A. Eigen::MatrixXd A_original; Eigen::MatrixXd previous_A_original; HeadNodeModel head_node_model = HeadNodeModel::HNM_NAIVE;//HeadNodeModel::HNM_FOURIER;// // Base transition matrix for the Fourier decomposition based head node model Eigen::MatrixXd A_fourier_base; // Initial state for the Fourier decomposition based head node model Eigen::VectorXd s0_fourier; // Determines whether to use the continuous version or the discretized // version of the solution for the system of differential equations. // // continuous = true: // Continuous version of the solution. We use the continuous form of the // transition matrix and matrix exponential. // // continuous = false: // Discretized version of the solution. We use the discretized version of // the transition matrix and repeated matrix multiplication. // // A_discretized = I + A_continuous * Δt bool continuous = true; // Access this as // current_latent_state Eigen::VectorXd current_latent_state; // Access this as // observed_state_sequence[ time step ][ vertex ][ indicator ] ObservedStateSequence observed_state_sequence; // Access this as // prediction_latent_state_sequences[ sample ][ time step ] std::vector> predicted_latent_state_sequences; // Access this as // predicted_observed_state_sequences // [ sample ][ time step ][ vertex ][ indicator ] std::vector predicted_observed_state_sequences; PredictedObservedStateSequence test_observed_state_sequence; // Implementing constraints or interventions. // ------------------------------------------------------------------------- // We are implementing two ways to constrain the model. // 1. One-off constraints. // A latent state is clamped to a constrained value just for the // specified time step and released to evolve from the subsequent time // step onward until the next constrained time step and so on. // E.g. Getting a one time grant. // 2. Perpetual constraints // Once a latent state gets clamped at a value at a particular time // step, it stays clamped at that value in subsequent time steps until // another constrain overwrites the current constrain or the end of // the prediction time is reached. // NOTE: Currently we do not have a way to have a semi-perpetual // constraint: A constraint is applied perpetually for some number of // continuous time steps and then switched off. With a little bit of // work we can implement this. We just need a special constraint value // to signal end of a constraint. One suggestion is to use NaN. // E.g. Maintaining a water level of a reservoir at a certain amount. // // NOTE: WE either apply One-off or Perpetual constraints to all the // concepts. The current design does not permit applying mixed // constraints such that some concepts are constrained one-off while // some others are constrained perpetual. With a little bit more work, // we could also achieve this. Moving the constraint type into the // constraint information data structure would work for keeping track // of mixed constraint types: // std::unordered_map>> // Then we would have to update the constraint processing logic // accordingly. // ------------------------------------------------------------------------- // // NOTE: This implementation of the constraints does not work at all with // multiple indicators being attached to a single concept. Constraining the // concept effects all the indicators and we cannot constrain targeted for a // particular indicator. In the current model we might achieve this by // constraining the scaling factor (which we incorrectly call as the // indicator mean). // Currently we are doing: // constrained latent state = constrained indicator value / scaling factor // The constraining that might work with multiple indicators per concept: // constrained scaling factor = constrained indicator value / latent state // ------------------------------------------------------------------------- // // Implementing the One-off constraints: // ------------------------------------------------------------------------- // To store constraints (or interventions) // For some times steps of the prediction range, latent state values could be // constrained to a value external from what the LDS predicts that value // should be. When prediction happens, if constrains are present at a time // step for some concepts, the predicted latent state values for those // concepts are overwritten by the constraints supplied in this data // structure. // Access // [ time step ] --> [(concept id, constrained value), ... ] // latent_state_constraints.at(time step) std::unordered_map>> one_off_constraints; std::unordered_map>> head_node_one_off_constraints; // // Implementing Perpetual constraints: // ------------------------------------------------------------------------- // Access // [ concept id ] --> constrained value // perpetual_constraints.at(concept id) std::unordered_map perpetual_constraints; // // Deciding which type of constraints to enforce // one_off_constraints is empty => unconstrained prediction // is_one_off_constraints = true => One-off constraints // is_one_off_constraints = false => Perpetual constraints bool is_one_off_constraints = true; // // Deciding whether to clamp the latent variable or the derivative // true => clamp at derivative // false => clamp at latent variable bool clamp_at_derivative = true; // // When we are clamping derivatives the clamp sticks since derivatives never // chance in our current model. So for one-off clamping, we have to reset the // derivative back to original after the clamping step. This variable // remembers the time step to reset the clamped derivatives. int rest_derivative_clamp_ts = -1; std::vector transition_matrix_collection; std::vector initial_latent_state_collection; //std::vector> latent_mean_collection; //std::vector> latent_std_collection; // Access: // [sample][node id]{partition --> (mean, std)} //std::vector>>> latent_mean_std_collection; std::vector synthetic_latent_state_sequence; bool synthetic_data_experiment = false; /* ============================================================================ Private: Integration with Uncharted's CauseMos interface (in causemos_integration.cpp) ============================================================================ */ /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ training-progress ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ create-model ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ void extract_concept_indicator_mapping_and_observations_from_json( const nlohmann::json &json_indicators, ConceptIndicatorData &concept_indicator_data, ConceptIndicatorEpochs &concept_indicator_epochs); static double epoch_to_timestep(long epoch, long train_start_epoch, long modeling_frequency); void infer_modeling_period( const ConceptIndicatorEpochs & concept_indicator_observation_timesteps, long &shortest_gap, long &longest_gap, long &frequent_gap, int &highest_frequency); void infer_concept_period(const ConceptIndicatorEpochs &concept_indicator_epochs); void set_observed_state_sequence_from_json_dict(const nlohmann::json &json_indicators); /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ create-experiment ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ // Epoch --> (year, month, date) std::tuple epoch_to_year_month_date(long epoch); void extract_projection_constraints( const nlohmann::json &projection_constraints, long skip_steps); FormattedProjectionResult run_causemos_projection_experiment_from_json_dict( const nlohmann::json &json_data); FormattedProjectionResult format_projection_result(); void sample_transition_matrix_collection_from_prior(); /* ============================================================================ Private: Model serialization (in serialize.cpp) ============================================================================ */ void from_delphi_json_dict(const nlohmann::json &json_data, bool verbose); /* ============================================================================ Private: Utilities (in graph_utils.cpp) ============================================================================ */ void clear_state(); void initialize_random_number_generator(); void remove_node(int node_id); // Allocate a num_verts x num_verts 2D array (std::vector of std::vectors) void allocate_A_beta_factors(); void find_all_paths_between(int start, int end, int cutoff); void find_all_paths_between_util(int start, int end, std::vector& path, int cutoff); int calculate_num_timesteps(int start_year, int start_month, int end_year, int end_month); /* ============================================================================ Private: Subgraphs (in subgraphs.cpp) ============================================================================ */ void get_subgraph(int vert, std::unordered_set& vertices_to_keep, int cutoff, bool inward); void get_subgraph_between(int start, int end, std::vector& path, std::unordered_set& vertices_to_keep, int cutoff); /* ============================================================================ Private: Accessors ============================================================================ */ int num_nodes() { return boost::num_vertices(graph); } int get_vertex_id(std::string concept) { using namespace fmt::literals; try { return this->name_to_vertex.at(concept); } catch (const std::out_of_range& oor) { throw std::out_of_range("Concept \"{}\" not in CAG!"_format(concept)); } } auto node_indices() const { return boost::make_iterator_range(boost::vertices(this->graph)); }; auto nodes() { using boost::adaptors::transformed; return this->node_indices() | transformed([&](int v) -> Node& { return (*this)[v]; }); }; auto node_names() { using boost::adaptors::transformed; return this->nodes() | transformed([&](auto node) -> std::string { return node.name; }); }; int get_degree(int vertex_id) { return boost::in_degree(vertex_id, this->graph) + boost::out_degree(vertex_id, this->graph); }; auto out_edges(int i) { return boost::make_iterator_range(boost::out_edges(i, graph)); } Node& source(EdgeDescriptor e) { return (*this)[boost::source(e, this->graph)]; }; Node& target(EdgeDescriptor e) { return (*this)[boost::target(e, this->graph)]; }; auto successors(int i) { return boost::make_iterator_range(boost::adjacent_vertices(i, this->graph)); } auto successors(std::string node_name) { return this->successors(this->name_to_vertex.at(node_name)); } std::vector get_successor_list(std::string node) { std::vector successors = {}; for (int successor : this->successors(node)) { successors.push_back((*this)[successor]); } return successors; } auto predecessors(int i) { return boost::make_iterator_range( boost::inv_adjacent_vertices(i, this->graph)); } auto predecessors(std::string node_name) { return this->predecessors(this->name_to_vertex.at(node_name)); } std::vector get_predecessor_list(std::string node) { std::vector predecessors = {}; for (int predecessor : this->predecessors(node)) { predecessors.push_back((*this)[predecessor]); } return predecessors; } double get_beta(std::string source_vertex_name, std::string target_vertex_name) { // This is ∂target / ∂source return this->A_original(2 * get_vertex_id(target_vertex_name), 2 * get_vertex_id(source_vertex_name) + 1); } /* ============================================================================ Private: Get Training Data Sequence (in train_model.cpp) ============================================================================ */ void set_observed_state_sequence_from_data(std::string country = "South Sudan", std::string state = "", std::string county = ""); std::vector>> get_observed_state_from_data(int year, int month, std::string country, std::string state = "", std::string county = ""); /* ============================================================================ Private: Initializing model parameters (in parameter_initialization.cpp) ============================================================================ */ void initialize_parameters(int res = 200, InitialBeta initial_beta = InitialBeta::ZERO, InitialDerivative initial_derivative = InitialDerivative::DERI_ZERO, bool use_heuristic = false, bool use_continuous = true); void set_indicator_means_and_standard_deviations(); void init_betas_to(InitialBeta ib = InitialBeta::MEAN); void construct_theta_pdfs(); /* ============================================================================ Private: Training by MCMC Sampling (in sampling.cpp) ============================================================================ */ void set_base_transition_matrix(); #ifdef MULTI_THREADING void compute_multiple_matrix_exponentials_parallelly( const Eigen::MatrixXd & A, std::vector> &me_futures); #endif // Sample elements of the stochastic transition matrix from the // prior distribution, based on gradable adjectives. void set_transition_matrix_from_betas(); void set_log_likelihood_helper(int ts); void set_log_likelihood(); void sample_from_posterior(); // TODO: Need testng // TODO: Before calling sample_from_proposal() we must call // AnalysisGraph::find_all_paths() // TODO: Before calling sample_from_proposal(), we mush assign initial βs and // run Tran_Mat_Cell::compute_cell() to initialize the first transistion // matrix. // TODO: Update Tran_Mat_Cell::compute_cell() to calculate the proper value. // At the moment it just computes sum of length of all the paths realted to // this cell void sample_from_proposal(); void update_transition_matrix_cells(EdgeDescriptor e); double calculate_delta_log_prior(); void revert_back_to_previous_state(); /* ============================================================================ Private: Modeling head nodes (in head_nodes.cpp) ============================================================================ */ void partition_data_and_calculate_mean_std_for_each_partition( Node& n, std::vector& latent_sequence); void apply_constraint_at(int ts, int node_id); void generate_head_node_latent_sequence(int node_id, int num_timesteps, bool sample, int seq_no = 0); void generate_head_node_latent_sequence_from_changes(Node &n, int num_timesteps, bool sample); void generate_head_node_latent_sequences(int samp, int num_timesteps); void update_head_node_latent_state_with_generated_derivatives( int ts_current, int ts_next, int concept_id, std::vector& latent_sequence); void update_latent_state_with_generated_derivatives(int ts_current, int ts_next); /* ============================================================================ Private: Prediction (in prediction.cpp) ============================================================================ */ void check_bounds(); void generate_latent_state_sequences(double initial_prediction_step); void perturb_predicted_latent_state_at(int timestep, int sample_number); void generate_observed_state_sequences(); std::vector> generate_observed_state(Eigen::VectorXd latent_state); FormattedPredictionResult format_prediction_result(); void run_model(int start_year, int start_month, int end_year, int end_month); void add_constraint(int step, std::string concept_name, std::string indicator_name, double indicator_clamp_value); /* ============================================================================ Private: Synthetic Data Experiment (in synthetic_data.cpp) ============================================================================ */ /* ============================================================================ Private: Graph Visualization (in graphviz.cpp) ============================================================================ */ std::pair to_agraph( bool simplified_labels = false, int label_depth = 1, std::string node_to_highlight = "", std::string rankdir = "TB"); /* ============================================================================ Private: Fourier decomposition based seasonal head node model (in fourier.cpp) ============================================================================ */ std::vector generate_frequencies_for_period(int period, int n_components); Eigen::MatrixXd evolve_LDS(const Eigen::MatrixXd &A_base, const Eigen::VectorXd &_s0, int n_modeling_time_steps, double step_size = 1); std::pair assemble_sinusoidal_generating_LDS(const std::vector &freqs); /* std::pair assemble_sinusoidal_generating_LDS(unsigned short components, unsigned short period); */ Eigen::MatrixXd generate_sinusoidal_values_for_bins(const Eigen::MatrixXd &A_sin_base, const Eigen::VectorXd &s0_sin, int period); // NOTE: This method could be made a method of the Node class. The best // architecture would be to make a subclass, HeadNode, of Node class and // include this method there. At the moment we incrementally create the // graph while identifying head nodes, we are using Node objects // everywhere. To follow the HeadNode subclass specialization route, we // either have to replace Node objects with HeadNode objects or do a // first pass through the input to identify head nodes and then create // the graph. void compute_fourier_coefficients_from_least_square_optimization( const Eigen::MatrixXd &sinusoidals, int n_components, std::vector &head_node_ids); std::pair assemble_head_node_modeling_LDS(const Eigen::MatrixXd &A_sin_base, const Eigen::VectorXd &s0_sin, int n_components, int n_concepts, std::unordered_map &hn_to_mat_row); bool determine_the_best_number_of_components( const Eigen::MatrixXd & A_hn_period_base, const Eigen::VectorXd & s0_hn_period, int period, int n_components, std::unordered_map &hn_to_mat_row); std::pair assemble_all_seasonal_head_node_modeling_LDS( std::unordered_set fourier_frequency_set, int n_concepts, std::unordered_map &hn_to_mat_row); void assemble_base_LDS(InitialDerivative id); void merge_concept_LDS_into_seasonal_head_node_modeling_LDS( const Eigen::MatrixXd &A_concept_base, const Eigen::VectorXd s0_concept); void predictions_to_csv(const Eigen::MatrixXd &A_base, const Eigen::VectorXd &_s0, int n_time_steps); std::pair fit_seasonal_head_node_model_via_fourier_decomposition(); public: AnalysisGraph() { one_off_constraints.clear(); perpetual_constraints.clear(); } ~AnalysisGraph() {} std::string id; std::string experiment_id = "experiment_id_not_set"; std::string to_json_string(int indent = 0); bool data_heuristic = false; // Set the sampling resolution. void set_res(size_t res); // Set the number of KDE kernels. void set_n_kde_kernels(size_t kde_kernels) {this->n_kde_kernels = kde_kernels;}; // Get the sampling resolution. size_t get_res(); // there may be a better place in this file for this prototype void from_causemos_json_dict(const nlohmann::json &json_data, double belief_score_cutoff, double grounding_score_cutoff); /* ============================================================================ Constructors from INDRA-exported JSON (in constructors.cpp) ============================================================================ */ static AnalysisGraph from_indra_statements_json_dict(nlohmann::json json_data, double belief_score_cutoff = 0.9, double grounding_score_cutoff = 0.0, std::string ontology = "WM"); static AnalysisGraph from_indra_statements_json_string(std::string json_string, double belief_score_cutoff = 0.9, double grounding_score_cutoff = 0.0, std::string ontology = "WM"); static AnalysisGraph from_indra_statements_json_file(std::string filename, double belief_score_cutoff = 0.9, double grounding_score_cutoff = 0.0, std::string ontology = "WM"); static AnalysisGraph from_causal_fragments(std::vector causal_fragments); static AnalysisGraph from_causal_fragments_with_data(std::pair, ConceptIndicatorAlignedData> cag_ind_data, int kde_kernels = 5); static AnalysisGraph from_json_string(std::string); AnalysisGraph(const AnalysisGraph& rhs); AnalysisGraph& operator=(AnalysisGraph rhs); /* ============================================================================ Public: Integration with Uncharted's CauseMos interface (in causemos_integration.cpp) ============================================================================ */ /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ training-progress ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ bool get_trained(){ return trained; } bool get_stopped() { return stopped; } double get_log_likelihood(){ return log_likelihood; } double get_previous_log_likelihood(){ return previous_log_likelihood; } double get_log_likelihood_MAP(){ return log_likelihood_MAP; } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ create-model ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ static AnalysisGraph from_causemos_json_string(std::string json_string, double belief_score_cutoff = 0, double grounding_score_cutoff = 0, int kde_kernels = 4); static AnalysisGraph from_causemos_json_file(std::string filename, double belief_score_cutoff = 0, double grounding_score_cutoff = 0, int kde_kernels = 4); std::string generate_create_model_response(); /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ create-experiment ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ FormattedProjectionResult run_causemos_projection_experiment_from_json_string(std::string json_string); FormattedProjectionResult run_causemos_projection_experiment_from_json_file(std::string filename); /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ edit-weights ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ unsigned short freeze_edge_weight(std::string source, std::string target, double scaled_weight, int polarity); /* ============================================================================ Public: Model serialization (in serialize.cpp) ============================================================================ */ std::string serialize_to_json_string(bool verbose = true, bool compact = true); void export_create_model_json_string(); static AnalysisGraph deserialize_from_json_string(std::string json_string, bool verbose = true); static AnalysisGraph deserialize_from_json_file(std::string filename, bool verbose = true); /* ============================================================================ Public: Accessors ============================================================================ */ size_t num_vertices() { return boost::num_vertices(this->graph); } Node& operator[](std::string node_name) { return (*this)[this->get_vertex_id(node_name)]; } Node& operator[](int v) { return this->graph[v]; } size_t num_edges() { return boost::num_edges(this->graph); } auto edges() const { return boost::make_iterator_range(boost::edges(graph)); } Edge& edge(EdgeDescriptor e) { return this->graph[e]; } Edge& edge(int source, int target) { return this->graph[boost::edge(source, target, this->graph).first]; } Edge& edge(int source, std::string target) { return this->graph [boost::edge(source, this->get_vertex_id(target), this->graph).first]; } Edge& edge(std::string source, int target) { return this->graph [boost::edge(this->get_vertex_id(source), target, this->graph).first]; } Edge& edge(std::string source, std::string target) { return this->graph[boost::edge(this->get_vertex_id(source), this->get_vertex_id(target), this->graph) .first]; } boost::range_detail::integer_iterator begin() { return boost::vertices(this->graph).first; }; boost::range_detail::integer_iterator end() { return boost::vertices(this->graph).second; }; Eigen::VectorXd& get_initial_latent_state() { return this->s0; }; double get_MAP_log_likelihood() { return this->log_likelihood_MAP; }; /* ============================================================================ Public: Graph Building (in graph_building.cpp) ============================================================================ */ int add_node(std::string concept); bool add_edge(CausalFragment causal_fragment); void add_edge(CausalFragmentCollection causal_fragments); std::pair add_edge(int, int); std::pair add_edge(int, std::string); std::pair add_edge(std::string, int); std::pair add_edge(std::string, std::string); void remove_node(std::string concept); // Note: // Although just calling this->remove_node(concept) within the loop // for( std::string concept : concept_s ) // is sufficient to implement this method, it is not very efficient. // It re-calculates directed simple paths for each vertex removed // // Therefore, the code in this->remove_node() has been duplicated with // slightly different flow to achive a more efficient execution. void remove_nodes(std::unordered_set concepts); void remove_edge(std::string src, std::string tgt); void remove_edges(std::vector> edges); /* ============================================================================ Public: Subgraphs (in subgraphs.cpp) ============================================================================ */ // TODO Change the name of this function to something better, like // restrict_to_subgraph_for_concept, update docstring AnalysisGraph get_subgraph_for_concept(std::string concept, bool inward = false, int depth = -1); AnalysisGraph get_subgraph_for_concept_pair(std::string source_concept, std::string target_concept, int cutoff = -1); /* ============================================================================ Public: Graph Modification (in graph_modification.cpp) ============================================================================ */ void prune(int cutoff = 2); // Merge node n1 into node n2, with the option to specify relative polarity. // void // merge_nodes_old(std::string n1, std::string n2, bool same_polarity = true); void merge_nodes(std::string concept_1, std::string concept_2, bool same_polarity = true); void change_polarity_of_edge(std::string source_concept, int source_polarity, std::string target_concept, int target_polarity); /* ============================================================================ Public: Indicator Manipulation (in indicator_manipulation.cpp) ============================================================================ */ int set_indicator(std::string concept, std::string indicator, std::string source); void delete_indicator(std::string concept, std::string indicator); void delete_all_indicators(std::string concept); void map_concepts_to_indicators(int n = 1, std::string country = ""); /* ============================================================================ Public: Utilities (in graph_utils.cpp) ============================================================================ */ /* * Find all the simple paths between all the paris of nodes of the graph */ void find_all_paths(); void set_random_seed(int seed); void set_derivative(std::string, double); /* ============================================================================ Public: Training the Model (in train_model.cpp) ============================================================================ */ void train_model(int start_year = 2012, int start_month = 1, int end_year = 2017, int end_month = 12, int res = 200, int burn = 10000, std::string country = "South Sudan", std::string state = "", std::string county = "", std::map units = {}, InitialBeta initial_beta = InitialBeta::ZERO, InitialDerivative initial_derivative = InitialDerivative::DERI_ZERO, bool use_heuristic = false, bool use_continuous = true); void run_train_model(int res = 200, int burn = 10000, HeadNodeModel head_node_model = HeadNodeModel::HNM_NAIVE, InitialBeta initial_beta = InitialBeta::ZERO, InitialDerivative initial_derivative = InitialDerivative::DERI_ZERO, bool use_heuristic = false, bool use_continuous = true, int train_start_timestep = 0, int train_timesteps = -1, std::unordered_map concept_periods = {}, std::unordered_map concept_center_measures = {}, std::unordered_map concept_models = {}, std::unordered_map concept_min_vals = {}, std::unordered_map concept_max_vals = {}, std::unordered_map > ext_concepts = {}); void run_train_model_2(int res = 200, int burn = 10000, InitialBeta initial_beta = InitialBeta::ZERO, InitialDerivative initial_derivative = InitialDerivative::DERI_ZERO, bool use_heuristic = false, bool use_continuous = true); /* ============================================================================ Public: Training by MCMC Sampling (in sampling.cpp) ============================================================================ */ void set_initial_latent_state(Eigen::VectorXd vec) { this->s0 = vec; }; void set_default_initial_state(InitialDerivative id = InitialDerivative::DERI_ZERO); static void check_multithreading(); /* ============================================================================ Public: Prediction (in prediction.cpp) ============================================================================ */ Prediction generate_prediction(int start_year, int start_month, int end_year, int end_month, ConstraintSchedule constraints = ConstraintSchedule(), bool one_off = true, bool clamp_deri = true); void generate_prediction(int pred_start_timestep, int pred_timesteps, ConstraintSchedule constraints = ConstraintSchedule(), bool one_off = true, bool clamp_deri = true); std::vector> prediction_to_array(std::string indicator); /* ============================================================================ Public: Synthetic data experiment (in synthetic_data.cpp) ============================================================================ */ static AnalysisGraph generate_random_CAG(unsigned int num_nodes, unsigned int num_extra_edges = 0); void generate_synthetic_data(unsigned int num_obs = 48, double noise_variance = 0.1, unsigned int kde_kernels = 1000, InitialBeta initial_beta = InitialBeta::PRIOR, InitialDerivative initial_derivative = InitialDerivative::DERI_PRIOR, bool use_continuous = false); void initialize_random_CAG(unsigned int num_obs, unsigned int kde_kernels, InitialBeta initial_beta, InitialDerivative initial_derivative, bool use_continuous); void interpolate_missing_months(std::vector &filled_months, Node &n); /* ============================================================================ Public: Graph Visualization (in graphviz.cpp) ============================================================================ */ std::string to_dot(); void to_png(std::string filename = "CAG.png", bool simplified_labels = false, int label_depth = 1, std::string node_to_highlight = "", std::string rankdir = "TB"); /* ============================================================================ Public: Printing (in printing.cpp) ============================================================================ */ void print_nodes(); void print_edges(); void print_name_to_vertex(); void print_indicators(); void print_A_beta_factors(); void print_latent_state(const Eigen::VectorXd&); /* * Prints the simple paths found between all pairs of nodes of the graph * Groupd according to the starting and ending vertex. * find_all_paths() should be called before this to populate the paths */ void print_all_paths(); // Given an edge (source, target vertex ids - i.e. a β ≡ ∂target/∂source), // print all the transition matrix cells that are dependent on it. void print_cells_affected_by_beta(int source, int target); void print_training_range(); void print_MAP_estimate(); /* ============================================================================ Public: Formatting output (in format_output.cpp) ============================================================================ */ CredibleIntervals get_credible_interval(Predictions preds); CompleteState get_complete_state(); /* ============================================================================ Public: Database interactions (in database.cpp) ============================================================================ */ sqlite3* open_delphi_db(int mode = SQLITE_OPEN_READONLY); void write_model_to_db(std::string model_id); AdjectiveResponseMap construct_adjective_response_map( std::mt19937 gen, std::uniform_real_distribution& uni_dist, std::normal_distribution& norm_dist, size_t n_kernels ); /* ============================================================================ Public: Profiling Delphi (in profiler.cpp) ============================================================================ */ void initialize_profiler(int res = 100, int kde_kernels = 1000, InitialBeta initial_beta = InitialBeta::ZERO, InitialDerivative initial_derivative = InitialDerivative::DERI_ZERO, bool use_continuous = true); void profile_mcmc(int run = 1, std::string file_name_prefix = "mcmc_timing"); void profile_kde(int run = 1, std::string file_name_prefix = "kde_timing"); void profile_prediction(int run = 1, int pred_timesteps = 24, std::string file_name_prefix = "prediction_timing"); void profile_matrix_exponential(int run = 1, std::string file_name_prefix = "mat_exp_timing", std::vector unique_gaps = {1, 2, 5}, int repeat = 30, bool multi_threaded = false); #ifdef TIME void set_timing_file_prefix(std::string tfp) {this->timing_file_prefix = tfp;} void create_mcmc_part_timing_file() { std::string filename = this->timing_file_prefix + "embeded_" + std::to_string(this->num_nodes()) + "-" + std::to_string(this->num_nodes()) + "_" + std::to_string(this->timing_run_number) + "_" + delphi::utils::get_timestamp() + ".csv"; this->writer = CSVWriter(filename); std::vector headings = {"Run", "Nodes", "Edges", "Wall Clock Time (ns)", "CPU Time (ns)", "Sample Type"}; writer.write_row(headings.begin(), headings.end()); // cout << filename << endl; } void set_timing_run_number(int run) {this->timing_run_number = run;} #endif };