.. _program_listing_file_lib_to_json.cpp: Program Listing for File to_json.cpp ==================================== |exhale_lsh| :ref:`Return to documentation for file ` (``lib/to_json.cpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #include "AnalysisGraph.hpp" #include using namespace std; using namespace fmt::literals; using namespace nlohmann; typedef vector, tuple>> Evidence; // TODO: For debugging remove later. using fmt::print; string AnalysisGraph::to_json_string(int indent) { nlohmann::json j; j["id"] = this->id; j["experiment_id"] = this->experiment_id; j["edges"] = {}; vector>> data; for (auto e : this->edges()) { string source = (*this)[boost::source(e, this->graph)].name; string target = (*this)[boost::target(e, this->graph)].name; j["edges"].push_back({ {"source", source}, {"target", target}, {"kernels", this->edge(e).kde.dataset}, }); } for (Node& n : this->nodes()) { // Just taking the first indicator for now, will try to incorporate multiple // indicators per node later. if (n.indicators.size() == 0) { throw runtime_error("Node {} has no indicators!"_format(n.name)); } Indicator& indicator = n.indicators.at(0); j["indicatorData"][n.name] = { {"name", indicator.name}, {"mean", indicator.mean}, {"source", indicator.source}, }; } return j.dump(indent); } string AnalysisGraph::serialize_to_json_string(bool verbose, bool compact) { nlohmann::json j; j["id"] = this->id; j["experiment_id"] = this->experiment_id; // This is an unordered_map: // concept name → Boost graph vertex id // Indicator assignment and observations are recorded according to this id. // Concept ids are continuous between 0 and this->num_vertices() // Edges are also described based on concept ids. //j["concepts"] = this->name_to_vertex; // To go for a more compressed version of concepts where array index is the // concept id j["concepts"] = {}; if (!verbose) { j["periods"] = {}; j["has_min"] = {}; j["min_val_obs"] = {}; j["has_max"] = {}; j["max_val_obs"] = {}; } // Concept to indicator mapping. This is an array of array of objects // Outer array goes from 0 to this->num_vertices() - 1 and it is indexed by // concept id. Inner array keeps all the indicator data for a single // concept. Each indicator is represented by a single json object. // Indicator ids (iid) are used to index into observations. // Irrespective of the order at which we populate the outer array, the json // library takes care of storing them ordered by the concept id. j["conceptIndicators"] = {}; for (int v = 0; v < this->num_vertices(); v++) { Node &n = (*this)[v]; if (verbose) { j["concepts"].push_back( {{"concept", n.name}, {"cid", this->name_to_vertex.at(n.name)}, {"period", n.period}, {"has_min", n.has_min}, {"min_val_obs", n.min_val_obs}, {"has_max", n.has_max}, {"max_val_obs", n.max_val_obs}}); for (Indicator &ind : n.indicators) { j["conceptIndicators"][this->name_to_vertex.at(n.name)].push_back( //j["conceptIndicators"][n.name].push_back( { {"indicator", ind.name}, {"iid", n.nameToIndexMap.at(ind.name)}, {"source", ind.source}, {"func", ind.aggregation_method}, {"unit", ind.unit} } ); } } else { // This is a more compressed way to store concept information where // array index keeps track of the concept id j["concepts"][this->name_to_vertex.at(n.name)] = n.name; j["periods"][this->name_to_vertex.at(n.name)] = n.period; j["has_min"][this->name_to_vertex.at(n.name)] = n.has_min; j["min_val_obs"][this->name_to_vertex.at(n.name)] = n.min_val_obs; j["has_max"][this->name_to_vertex.at(n.name)] = n.has_max; j["max_val_obs"][this->name_to_vertex.at(n.name)] = n.max_val_obs; for (Indicator &ind : n.indicators) { // This is a more compressed representation. We do not store iid // separately. iid is the index at which this indicator information // object is stored at. j["conceptIndicators"] [this->name_to_vertex.at(n.name)] [n.nameToIndexMap.at(ind.name)] = { {"indicator", ind.name}, {"source", ind.source}, {"func", ind.aggregation_method}, {"unit", ind.unit} }; } } } // To test how things get ordered in the json side // We do not need to insert nodes according to the order of their ids. // json object created orders them properly. /* j["conceptIndicators"][8].push_back( { {"indicator", "eight"}, {"iid", 8}, {"source", "src"}, {"func", "func"}, {"unit", "mm"} } ); j["conceptIndicators"][5].push_back( { {"indicator", "five"}, {"iid", 5}, {"source", "src"}, {"func", "func"}, {"unit", "mm"} } ); */ // Serialize the edges. // Edges are identified by the source and target vertex ids. j["edges"] = {}; for (const auto e : this->edges()) { const Node &source = (*this)[boost::source(e, this->graph)]; const Node &target = (*this)[boost::target(e, this->graph)]; int num_evidence = this->edge(e).evidence.size(); Evidence evidence = Evidence(num_evidence); for (int evid = 0; evid < num_evidence; evid++) { const Statement &stmt = this->edge(e).evidence[evid]; const Event &subj = stmt.subject; const Event &obj = stmt.object; evidence[evid] = {{subj.adjective, subj.polarity, subj.concept_name}, {obj.adjective, obj.polarity, obj.concept_name}}; } if (verbose) { j["edges"].push_back({{"source", source.name}, {"target", target.name}, {"kernels", compact ? vector() : this->edge(e).kde.dataset}, {"evidence", evidence}, {"thetas", this->edge(e).sampled_thetas}, {"log_prior_hist", compact ? vector() : this->edge(e).kde.log_prior_hist}, {"n_bins", this->edge(e).kde.n_bins}, {"theta", this->edge(e).get_theta()}, {"is_frozen", this->edge(e).is_frozen()}}); } else { // This is a more compressed version of edges. We do not utilize space // for key names. Maybe once we have debugged the whole process, we // might be able to go for this. j["edges"].push_back(make_tuple(name_to_vertex.at(source.name), name_to_vertex.at(target.name), evidence, this->edge(e).sampled_thetas, compact ? vector() : this->edge(e).kde.dataset, compact ? vector() : this->edge(e).kde.log_prior_hist, this->edge(e).kde.n_bins, this->edge(e).get_theta(), this->edge(e).is_frozen())); } } if (verbose) { j["start_year"] = this->training_range.first.first; j["start_month"] = this->training_range.first.second; j["end_year"] = this->training_range.second.first; j["end_month"] = this->training_range.second.second; } else { // This is a pair of pairs where the first pair is and the second pair is j["training_range"] = this->training_range; } j["num_modeling_timesteps_per_one_observation_timestep"] = this->num_modeling_timesteps_per_one_observation_timestep; j["train_start_epoch"] = this->train_start_epoch; j["train_end_epoch"] = this->train_end_epoch; j["train_timesteps"] = this->n_timesteps; j["modeling_timestep_gaps"] = this->modeling_timestep_gaps; j["observation_timesteps_sorted"] = this->observation_timesteps_sorted; j["model_data_agg_level"] = this->model_data_agg_level; // This contains all the observations. Indexing goes by // [ timestep ][ concept ][ indicator ][ observation ] // Concept and indicator indexes are according to the concept and indicator // ids mentioned above. j["observations"] = this->observed_state_sequence; j["trained"] = this->trained; if (this->trained) { // Serialize the sampled transition matrices and initial latent states // along with on the parameters related to training. // For transition matrices, we only need to serialize odd column // positions on even rows (even, odd) as all the other positions remain // constant. // For initial letter states, we only need to serialize odd positions // as those are the sampled derivatives. All the even positions stay // constant at 1. j["res"] = this->res; j["kde_kernels"] = this->n_kde_kernels; j["continuous"] = this->continuous; j["data_heuristic"] = this->data_heuristic; j["causemos_call"] = this->causemos_call; j["MAP_sample_number"] = this->MAP_sample_number; j["log_likelihood_MAP"] = this->log_likelihood_MAP; j["head_node_model"] = this->head_node_model; j["log_likelihoods"] = compact ? vector() : this->log_likelihoods; int num_verts = this->num_vertices(); int num_els_per_mat = num_verts * num_verts; // Instead of serializing things as sequences of matrices and vectors // we flatten them into one single long vectors. Here we are setting // the last element of each vector to a dummy value just to make the // json library allocate all the memory required to store these // vectors. These dummy values get overwritten with actual data. j["matrices"][num_els_per_mat * this->res - 1] = 11111111111111; j["S0s"][num_verts * this->res - 1] = 11111111111111; for (int samp = 0; samp < this->res; samp++) { for (int row = 0; row < num_verts; row++) { j["S0s"][samp * num_verts + row] = this->initial_latent_state_collection[samp](row * 2 + 1); for (int col = 0; col < num_verts; col++) { j["matrices"][samp * num_els_per_mat + row * num_verts + col] = this->transition_matrix_collection[samp](row * 2, col * 2 + 1); } } } if (this->head_node_model == HNM_FOURIER) { /* * Transition matrix * b ............ b 0 ............. 0 * b ............ b 0 ............. 0 * * h ............ h c ............. c * h ............ h c ............. c * 0 ............ 0 s s 0 ......... 0 * 0 ............ 0 s s 0 ......... 0 * 0 ................ 0 s s 0 ..... 0 * 0 ................ 0 s s 0 ..... 0 * * 0 .......................... 0 s s * 0 .......................... 0 s s * * b - Body node rows * h - Seasonal head node rows * c - Fourier coefficient rows * s - Sinusoidal generating rows * * Rows come in pairs: first and second derivative rows. * * Here we are not saving the block of b and h values. Those * are the sampled transition matrices. * * sinusoidal_rows = # sinusoidal generating rows * = # Fourier coefficient columns * * # coefficient rows per head node = 2 * # coefficients per head node = 2 * sinusoidal_rows * # coefficients for all head nodes = 2 * sinusoidal_rows * # head nodes * * max # non-zero values per sinusoidal row = 2 * max # non-zero sinusoidal values = 2 * sinusoidal_rows * * Total number of values to save * = 2 * sinusoidal_rows * # head nodes + 2 * sinusoidal_rows * = 2 * sinusoidal_rows * (# head nodes + 1) * * Initial state * # rows per head node = 2 * # for all head nodes = 2 * # head nodes * * # non-zero values per frequency = 1 * # non-zero values for all frequencies = sinusoidal_rows / 2 * (Since we are using 0 radians as the initial angle, the * initial state for sine curves = sin(0) = 0. So we do not * need to save that) * * Total number of values to save * = sinusoidal_rows / 2 + 2 * # head nodes * */ // The first diagonal block of sinusoidal_start_idx by // sinusoidal_start_idx contains the transition matrix portion that // describes the relationships between concepts. int sinusoidal_start_idx = 2 * num_verts; // The next diagonal block of sinusoidal_rows by sinusoidal_rows // contains the transition matrix part that generates sinusoidals // of different frequencies. int sinusoidal_rows = this->A_fourier_base.rows() - sinusoidal_start_idx; vector head_node_ids_sorted(this->head_nodes.begin(), this->head_nodes.end()); sort(head_node_ids_sorted.begin(), head_node_ids_sorted.end()); // Set the last element with a dummy value to allocate memory j["A_fourier_base"][sinusoidal_rows * 2 * (this->head_nodes.size() + 1) - 1] = 111111; j["s0_fourier"][sinusoidal_rows / 2 + 2 * this->head_nodes.size() - 1] = 111111; j["head_node_ids"] = head_node_ids_sorted; j["sinusoidal_rows"] = sinusoidal_rows; int n_hn = head_node_ids_sorted.size(); int fouri_val_idx = 0; // To keep track of the next index to insert // Save Fourier coefficients of seasonal head nodes for (int hn_idx = 0; hn_idx < n_hn; hn_idx++) { int hn_id = head_node_ids_sorted[hn_idx]; int dot_row = 2 * hn_id; int dot_dot_row = dot_row + 1; // Save coefficients for derivative row and second derivative // row. for (int row: {dot_row, dot_dot_row}) { // Save coefficients for one row for (int col = 0; col < sinusoidal_rows; col++) { j["A_fourier_base"][fouri_val_idx++] = this->A_fourier_base(row, sinusoidal_start_idx + col); } } // Saving initial value and derivative for head node hn_id // For the state vector, dot_row is the value row and // dot_dot_row is the dot row (We are just reusing the same // indexes) j["s0_fourier"][2 * hn_idx] = this->s0_fourier(dot_row); j["s0_fourier"][2 * hn_idx + 1] = this->s0_fourier(dot_dot_row); } // Saving different frequency sinusoidal curves generating portions for (int row = 0; row < sinusoidal_rows; row += 2) { int dot_row = sinusoidal_start_idx + row; if (this->continuous) { // There is only two non-zero values for a pair of rows. // this->A_fourier_base(dot_row, dot_row + 1) is always 1, // and so we do not need to save it. j["A_fourier_base"][fouri_val_idx++] = this->A_fourier_base(dot_row + 1, dot_row); } else { // Extract a 2 x 2 block of non-zero values. for (int r = 0; r < 2; r++) { for (int c = 0; c < 2; c++) { j["A_fourier_base"][fouri_val_idx++] = this->A_fourier_base(dot_row + r, dot_row + c); } } } // Saving the initial state for cosine curves // For the state vector, dot_dot_row is the dot row (We are just // reusing the same index) j["s0_fourier"][2 * n_hn + row] = this->s0_fourier(dot_row + 1); } } } return j.dump(4); } void AnalysisGraph::export_create_model_json_string() { nlohmann::json j; j["id"] = this->id; j["experiment_id"] = this->experiment_id; j["statements"] = {}; j["statements"].push_back({{"belief", 1}}); j["conceptIndicators"] = {}; cout << j.dump(4) << endl; }