Program Listing for File to_json.cpp
↰ Return to documentation for file (lib/to_json.cpp
)
#include "AnalysisGraph.hpp"
#include <fmt/format.h>
using namespace std;
using namespace fmt::literals;
using namespace nlohmann;
typedef vector<pair<tuple<string, int, string>, tuple<string, int, string>>>
Evidence;
// TODO: For debugging remove later.
using fmt::print;
string AnalysisGraph::to_json_string(int indent) {
nlohmann::json j;
j["id"] = this->id;
j["experiment_id"] = this->experiment_id;
j["edges"] = {};
vector<tuple<string, string, vector<double>>> data;
for (auto e : this->edges()) {
string source = (*this)[boost::source(e, this->graph)].name;
string target = (*this)[boost::target(e, this->graph)].name;
j["edges"].push_back({
{"source", source},
{"target", target},
{"kernels", this->edge(e).kde.dataset},
});
}
for (Node& n : this->nodes()) {
// Just taking the first indicator for now, will try to incorporate multiple
// indicators per node later.
if (n.indicators.size() == 0) {
throw runtime_error("Node {} has no indicators!"_format(n.name));
}
Indicator& indicator = n.indicators.at(0);
j["indicatorData"][n.name] = {
{"name", indicator.name},
{"mean", indicator.mean},
{"source", indicator.source},
};
}
return j.dump(indent);
}
string AnalysisGraph::serialize_to_json_string(bool verbose, bool compact) {
nlohmann::json j;
j["id"] = this->id;
j["experiment_id"] = this->experiment_id;
// This is an unordered_map:
// concept name → Boost graph vertex id
// Indicator assignment and observations are recorded according to this id.
// Concept ids are continuous between 0 and this->num_vertices()
// Edges are also described based on concept ids.
//j["concepts"] = this->name_to_vertex;
// To go for a more compressed version of concepts where array index is the
// concept id
j["concepts"] = {};
if (!verbose) {
j["periods"] = {};
j["has_min"] = {};
j["min_val_obs"] = {};
j["has_max"] = {};
j["max_val_obs"] = {};
}
// Concept to indicator mapping. This is an array of array of objects
// Outer array goes from 0 to this->num_vertices() - 1 and it is indexed by
// concept id. Inner array keeps all the indicator data for a single
// concept. Each indicator is represented by a single json object.
// Indicator ids (iid) are used to index into observations.
// Irrespective of the order at which we populate the outer array, the json
// library takes care of storing them ordered by the concept id.
j["conceptIndicators"] = {};
for (int v = 0; v < this->num_vertices(); v++) {
Node &n = (*this)[v];
if (verbose) {
j["concepts"].push_back(
{{"concept", n.name},
{"cid", this->name_to_vertex.at(n.name)},
{"period", n.period},
{"has_min", n.has_min},
{"min_val_obs", n.min_val_obs},
{"has_max", n.has_max},
{"max_val_obs", n.max_val_obs}});
for (Indicator &ind : n.indicators) {
j["conceptIndicators"][this->name_to_vertex.at(n.name)].push_back(
//j["conceptIndicators"][n.name].push_back(
{
{"indicator", ind.name},
{"iid", n.nameToIndexMap.at(ind.name)},
{"source", ind.source},
{"func", ind.aggregation_method},
{"unit", ind.unit}
}
);
}
} else {
// This is a more compressed way to store concept information where
// array index keeps track of the concept id
j["concepts"][this->name_to_vertex.at(n.name)] = n.name;
j["periods"][this->name_to_vertex.at(n.name)] = n.period;
j["has_min"][this->name_to_vertex.at(n.name)] = n.has_min;
j["min_val_obs"][this->name_to_vertex.at(n.name)] = n.min_val_obs;
j["has_max"][this->name_to_vertex.at(n.name)] = n.has_max;
j["max_val_obs"][this->name_to_vertex.at(n.name)] = n.max_val_obs;
for (Indicator &ind : n.indicators) {
// This is a more compressed representation. We do not store iid
// separately. iid is the index at which this indicator information
// object is stored at.
j["conceptIndicators"]
[this->name_to_vertex.at(n.name)]
[n.nameToIndexMap.at(ind.name)] =
{
{"indicator", ind.name},
{"source", ind.source},
{"func", ind.aggregation_method},
{"unit", ind.unit}
};
}
}
}
// To test how things get ordered in the json side
// We do not need to insert nodes according to the order of their ids.
// json object created orders them properly.
/*
j["conceptIndicators"][8].push_back(
{
{"indicator", "eight"},
{"iid", 8},
{"source", "src"},
{"func", "func"},
{"unit", "mm"}
}
);
j["conceptIndicators"][5].push_back(
{
{"indicator", "five"},
{"iid", 5},
{"source", "src"},
{"func", "func"},
{"unit", "mm"}
}
);
*/
// Serialize the edges.
// Edges are identified by the source and target vertex ids.
j["edges"] = {};
for (const auto e : this->edges()) {
const Node &source = (*this)[boost::source(e, this->graph)];
const Node &target = (*this)[boost::target(e, this->graph)];
int num_evidence = this->edge(e).evidence.size();
Evidence evidence = Evidence(num_evidence);
for (int evid = 0; evid < num_evidence; evid++) {
const Statement &stmt = this->edge(e).evidence[evid];
const Event &subj = stmt.subject;
const Event &obj = stmt.object;
evidence[evid] = {{subj.adjective, subj.polarity, subj.concept_name},
{obj.adjective, obj.polarity, obj.concept_name}};
}
if (verbose) {
j["edges"].push_back({{"source", source.name},
{"target", target.name},
{"kernels", compact ? vector<double>()
: this->edge(e).kde.dataset},
{"evidence", evidence},
{"thetas", this->edge(e).sampled_thetas},
{"log_prior_hist",
compact ? vector<double>()
: this->edge(e).kde.log_prior_hist},
{"n_bins", this->edge(e).kde.n_bins},
{"theta", this->edge(e).get_theta()},
{"is_frozen", this->edge(e).is_frozen()}});
}
else {
// This is a more compressed version of edges. We do not utilize space
// for key names. Maybe once we have debugged the whole process, we
// might be able to go for this.
j["edges"].push_back(make_tuple(name_to_vertex.at(source.name),
name_to_vertex.at(target.name),
evidence,
this->edge(e).sampled_thetas,
compact ? vector<double>()
: this->edge(e).kde.dataset,
compact ? vector<double>()
: this->edge(e).kde.log_prior_hist,
this->edge(e).kde.n_bins,
this->edge(e).get_theta(),
this->edge(e).is_frozen()));
}
}
if (verbose) {
j["start_year"] = this->training_range.first.first;
j["start_month"] = this->training_range.first.second;
j["end_year"] = this->training_range.second.first;
j["end_month"] = this->training_range.second.second;
} else {
// This is a pair of pairs where the first pair is <start_year,
// start_month> and the second pair is <end_year, end_month>
j["training_range"] = this->training_range;
}
j["num_modeling_timesteps_per_one_observation_timestep"] = this->num_modeling_timesteps_per_one_observation_timestep;
j["train_start_epoch"] = this->train_start_epoch;
j["train_end_epoch"] = this->train_end_epoch;
j["train_timesteps"] = this->n_timesteps;
j["modeling_timestep_gaps"] = this->modeling_timestep_gaps;
j["observation_timesteps_sorted"] = this->observation_timesteps_sorted;
j["model_data_agg_level"] = this->model_data_agg_level;
// This contains all the observations. Indexing goes by
// [ timestep ][ concept ][ indicator ][ observation ]
// Concept and indicator indexes are according to the concept and indicator
// ids mentioned above.
j["observations"] = this->observed_state_sequence;
j["trained"] = this->trained;
if (this->trained) {
// Serialize the sampled transition matrices and initial latent states
// along with on the parameters related to training.
// For transition matrices, we only need to serialize odd column
// positions on even rows (even, odd) as all the other positions remain
// constant.
// For initial letter states, we only need to serialize odd positions
// as those are the sampled derivatives. All the even positions stay
// constant at 1.
j["res"] = this->res;
j["kde_kernels"] = this->n_kde_kernels;
j["continuous"] = this->continuous;
j["data_heuristic"] = this->data_heuristic;
j["causemos_call"] = this->causemos_call;
j["MAP_sample_number"] = this->MAP_sample_number;
j["log_likelihood_MAP"] = this->log_likelihood_MAP;
j["head_node_model"] = this->head_node_model;
j["log_likelihoods"] = compact ? vector<double>()
: this->log_likelihoods;
int num_verts = this->num_vertices();
int num_els_per_mat = num_verts * num_verts;
// Instead of serializing things as sequences of matrices and vectors
// we flatten them into one single long vectors. Here we are setting
// the last element of each vector to a dummy value just to make the
// json library allocate all the memory required to store these
// vectors. These dummy values get overwritten with actual data.
j["matrices"][num_els_per_mat * this->res - 1] = 11111111111111;
j["S0s"][num_verts * this->res - 1] = 11111111111111;
for (int samp = 0; samp < this->res; samp++) {
for (int row = 0; row < num_verts; row++) {
j["S0s"][samp * num_verts + row] = this->initial_latent_state_collection[samp](row * 2 + 1);
for (int col = 0; col < num_verts; col++) {
j["matrices"][samp * num_els_per_mat + row * num_verts + col] = this->transition_matrix_collection[samp](row * 2, col * 2 + 1);
}
}
}
if (this->head_node_model == HNM_FOURIER) {
/*
* Transition matrix
* b ............ b 0 ............. 0
* b ............ b 0 ............. 0
*
* h ............ h c ............. c
* h ............ h c ............. c
* 0 ............ 0 s s 0 ......... 0
* 0 ............ 0 s s 0 ......... 0
* 0 ................ 0 s s 0 ..... 0
* 0 ................ 0 s s 0 ..... 0
*
* 0 .......................... 0 s s
* 0 .......................... 0 s s
*
* b - Body node rows
* h - Seasonal head node rows
* c - Fourier coefficient rows
* s - Sinusoidal generating rows
*
* Rows come in pairs: first and second derivative rows.
*
* Here we are not saving the block of b and h values. Those
* are the sampled transition matrices.
*
* sinusoidal_rows = # sinusoidal generating rows
* = # Fourier coefficient columns
*
* # coefficient rows per head node = 2
* # coefficients per head node = 2 * sinusoidal_rows
* # coefficients for all head nodes = 2 * sinusoidal_rows * # head nodes
*
* max # non-zero values per sinusoidal row = 2
* max # non-zero sinusoidal values = 2 * sinusoidal_rows
*
* Total number of values to save
* = 2 * sinusoidal_rows * # head nodes + 2 * sinusoidal_rows
* = 2 * sinusoidal_rows * (# head nodes + 1)
*
* Initial state
* # rows per head node = 2
* # for all head nodes = 2 * # head nodes
*
* # non-zero values per frequency = 1
* # non-zero values for all frequencies = sinusoidal_rows / 2
* (Since we are using 0 radians as the initial angle, the
* initial state for sine curves = sin(0) = 0. So we do not
* need to save that)
*
* Total number of values to save
* = sinusoidal_rows / 2 + 2 * # head nodes
*
*/
// The first diagonal block of sinusoidal_start_idx by
// sinusoidal_start_idx contains the transition matrix portion that
// describes the relationships between concepts.
int sinusoidal_start_idx = 2 * num_verts;
// The next diagonal block of sinusoidal_rows by sinusoidal_rows
// contains the transition matrix part that generates sinusoidals
// of different frequencies.
int sinusoidal_rows = this->A_fourier_base.rows() -
sinusoidal_start_idx;
vector<int> head_node_ids_sorted(this->head_nodes.begin(),
this->head_nodes.end());
sort(head_node_ids_sorted.begin(), head_node_ids_sorted.end());
// Set the last element with a dummy value to allocate memory
j["A_fourier_base"][sinusoidal_rows * 2 *
(this->head_nodes.size() + 1) - 1] = 111111;
j["s0_fourier"][sinusoidal_rows / 2 +
2 * this->head_nodes.size() - 1] = 111111;
j["head_node_ids"] = head_node_ids_sorted;
j["sinusoidal_rows"] = sinusoidal_rows;
int n_hn = head_node_ids_sorted.size();
int fouri_val_idx = 0; // To keep track of the next index to insert
// Save Fourier coefficients of seasonal head nodes
for (int hn_idx = 0; hn_idx < n_hn; hn_idx++) {
int hn_id = head_node_ids_sorted[hn_idx];
int dot_row = 2 * hn_id;
int dot_dot_row = dot_row + 1;
// Save coefficients for derivative row and second derivative
// row.
for (int row: {dot_row, dot_dot_row}) {
// Save coefficients for one row
for (int col = 0; col < sinusoidal_rows; col++) {
j["A_fourier_base"][fouri_val_idx++] = this->A_fourier_base(row,
sinusoidal_start_idx + col);
}
}
// Saving initial value and derivative for head node hn_id
// For the state vector, dot_row is the value row and
// dot_dot_row is the dot row (We are just reusing the same
// indexes)
j["s0_fourier"][2 * hn_idx] = this->s0_fourier(dot_row);
j["s0_fourier"][2 * hn_idx + 1] = this->s0_fourier(dot_dot_row);
}
// Saving different frequency sinusoidal curves generating portions
for (int row = 0; row < sinusoidal_rows; row += 2) {
int dot_row = sinusoidal_start_idx + row;
if (this->continuous) {
// There is only two non-zero values for a pair of rows.
// this->A_fourier_base(dot_row, dot_row + 1) is always 1,
// and so we do not need to save it.
j["A_fourier_base"][fouri_val_idx++] =
this->A_fourier_base(dot_row + 1, dot_row);
} else {
// Extract a 2 x 2 block of non-zero values.
for (int r = 0; r < 2; r++) {
for (int c = 0; c < 2; c++) {
j["A_fourier_base"][fouri_val_idx++] =
this->A_fourier_base(dot_row + r,
dot_row + c);
}
}
}
// Saving the initial state for cosine curves
// For the state vector, dot_dot_row is the dot row (We are just
// reusing the same index)
j["s0_fourier"][2 * n_hn + row] = this->s0_fourier(dot_row + 1);
}
}
}
return j.dump(4);
}
void AnalysisGraph::export_create_model_json_string() {
nlohmann::json j;
j["id"] = this->id;
j["experiment_id"] = this->experiment_id;
j["statements"] = {};
j["statements"].push_back({{"belief", 1}});
j["conceptIndicators"] = {};
cout << j.dump(4) << endl;
}