Program Listing for File causemos_integration.cpp
↰ Return to documentation for file (lib/causemos_integration.cpp
)
// CauseMos integration methods
#include "AnalysisGraph.hpp"
#include "ExperimentStatus.hpp"
#include "utils.hpp"
#include <fmt/format.h>
#include <fstream>
#include <range/v3/all.hpp>
#include <time.h>
#include <limits.h>
using namespace std;
using namespace delphi::utils;
using namespace fmt::literals;
using fmt::print;
/*
============================================================================
Private: Integration with Uncharted's CauseMos interface
============================================================================
*/
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
create-model (private)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
void AnalysisGraph::extract_concept_indicator_mapping_and_observations_from_json(
const nlohmann::json &json_indicators,
ConceptIndicatorData &concept_indicator_data,
ConceptIndicatorEpochs &concept_indicator_epochs) {
int num_verts = this->num_vertices();
this->train_start_epoch = LONG_MAX;
this->train_end_epoch = 0;
long epoch;
DataAggregationLevel first_resolution = DataAggregationLevel::NONE;
for (int v = 0; v < num_verts; v++) {
Node& n = (*this)[v];
// At the moment we are only attaching one indicator per node
// when Analysis graph is called through CauseMos
string indicator_name = "Qualitative measure of {}"_format(n.name);
string indicator_source = "Delphi";
if (!json_indicators.contains(n.name)) {
// In this case we do not have any observation data to train the model
this->set_indicator(n.name, indicator_name, indicator_source);
n.get_indicator(indicator_name).set_mean(1.0);
continue;
}
// TODO: At the moment the json file specifies one indicator per
// concept. At a later time if we update the json file to specify
// multiple indicators per concept, this is one place we have to
// update. Instead of a single indicator, we might get a list of
// indicators here. Rest of the code would have to be updated
// according to whatever the updated json file format we come up with.
auto indicator = json_indicators[n.name];
if (!indicator["source"].is_null()) {
indicator_source = indicator["source"].get<string>();
}
if (indicator["name"].is_null()) {
// This case there could be observations. However we are discarding
// them. Why?
this->set_indicator(n.name, indicator_name, indicator_source);
n.get_indicator(indicator_name).set_mean(1.0);
continue;
}
indicator_name = indicator["name"].get<string>();
if (!indicator["minValue"].is_null()) {
n.has_min = true;
n.min_val_obs = indicator["minValue"].get<double>();
}
if (!indicator["maxValue"].is_null()) {
n.has_max = true;
n.max_val_obs = indicator["maxValue"].get<double>();
}
if (!indicator["period"].is_null()) {
n.period = indicator["period"].get<int>();
}
if (!indicator["resolution"].is_null()) {
string resolution = indicator["resolution"].get<string>();
if (resolution.compare("month") == 0) {
n.agg_level = DataAggregationLevel::MONTHLY;
}
else if (resolution.compare("year") == 0) {
n.agg_level = DataAggregationLevel::YEARLY;
}
}
if (first_resolution == DataAggregationLevel::NONE) {
first_resolution = n.agg_level;
this->model_data_agg_level = n.agg_level;
}
else if (first_resolution != n.agg_level) {
cout << "\n* * * WARNING: Mixed resolution observations! * * *\n";
cout << "\t" << n.name << " has resolution " <<
(n.agg_level == DataAggregationLevel::YEARLY ? "yearly" : "monthly") << endl;
cout << "\tDelphi is currently not designed to model mixed resolution data\n";
cout << "\tThe predictions will be unintuitive\n\n";
}
int ind_idx = this->set_indicator(n.name, indicator_name, indicator_source);
if (ind_idx == -1) {
// This is a duplicate indicator, and will not be added again.
// TODO: Decide how to handle this situation and inform the user at
// the CauseMos HMI end.
continue;
}
// [ epoch --→ observation ]
multimap<long, double> indicator_data;
// Accumulate which epochs data points are available for
// The idea is to use this to assess the frequency of the data.
set<long> epochs;
for (auto& data_point : indicator["values"]) {
if (data_point["value"].is_null()) {
// This is a missing data point
continue;
}
double observation = data_point["value"].get<double>();
if (data_point["timestamp"].is_null()) {continue;}
// Note: HMI sends epochs as unix epoch * 1000
epoch = data_point["timestamp"].get<long>();
// Keep track of multiple observations for each epoch
indicator_data.insert(make_pair(epoch, observation));
// Record the epochs where observations are available for this
// indicator. This data is used to assess the observation
// frequency.
epochs.insert(epoch);
// Find the start epoch of observations. When observation
// sequences are not aligned:
// start epoch => earliest observation among all the
// observation sequences.
if (this->train_start_epoch > epoch) {
this->train_start_epoch = epoch;
}
// Find the end epoch of observations. When observation
// sequences are not aligned:
// end epoch => latest observation among all the observation
// sequences.
if (this->train_end_epoch < epoch) {
this->train_end_epoch = epoch;
}
}
// Add this indicator observations to the concept. The data structure
// has provision to assign multiple indicator observation sequences for
// a single concept.
concept_indicator_data[v].push_back(indicator_data);
// To assess the frequency of the data
concept_indicator_epochs[v] = vector<long>(epochs.begin(), epochs.end());
}
}
double AnalysisGraph::epoch_to_timestep(long epoch, long train_start_epoch,
long modeling_frequency) {
return (epoch - train_start_epoch) / double(modeling_frequency);
}
void AnalysisGraph::infer_modeling_period(
const ConceptIndicatorEpochs & concept_indicator_observation_timesteps,
long &shortest_gap,
long &longest_gap,
long &frequent_gap,
int &highest_frequency) {
unordered_set<long> observation_timesteps_all;
for (vector<long> ind_observation_timesteps : concept_indicator_observation_timesteps) {
observation_timesteps_all.insert(ind_observation_timesteps.begin(),
ind_observation_timesteps.end());
}
this->observation_timesteps_sorted = vector<long>(observation_timesteps_all.begin(),
observation_timesteps_all.end());
sort(this->observation_timesteps_sorted.begin(),
this->observation_timesteps_sorted.end());
vector<long> observation_timestep_gaps(this->observation_timesteps_sorted.size());
adjacent_difference(this->observation_timesteps_sorted.begin(),
this->observation_timesteps_sorted.end(),
observation_timestep_gaps.begin());
// Compute number of epochs between data points
unordered_map<long, int> gap_frequencies;
unordered_map<long, int>::iterator itr;
for (int gap = 1; gap < observation_timestep_gaps.size(); gap++) {
// Check whether two adjacent data points with the same gap of
// epochs in between is already found.
itr = gap_frequencies.find(observation_timestep_gaps[gap]);
if (itr != gap_frequencies.end()) {
// There were previous adjacent pairs of data points with
// gap epochs in between. Now we have found one more
// so increase the number of data points at this frequency.
itr->second++;
} else {
// This is the first data point that is gap epochs
// away from its previous data point. Start recording this new
// frequency.
gap_frequencies.insert(make_pair(observation_timestep_gaps[gap], 1));
}
}
// Find the smallest gap and most frequent gap
shortest_gap = LONG_MAX;
longest_gap = 0;
frequent_gap = LONG_MAX;
highest_frequency = 0;
// Epochs statistics for individual indicator time series for debugging purposes.
for(auto const& [gap, freq] : gap_frequencies){
if (shortest_gap > gap) {
shortest_gap = gap;
}
if (longest_gap < gap) {
longest_gap = gap;
}
if (highest_frequency == freq) {
// In case of multiple observation_timestep_gaps having the same highest frequency,
// note down the shortest highest frequency gap
if (frequent_gap > gap) {
frequent_gap = gap;
}
} else if (highest_frequency < freq) {
highest_frequency = freq;
frequent_gap = gap;
}
}
// num_modeling_timesteps_per_one_observation_timestep is the number of
// observation timesteps taken as one modeling timestep.
// Let us assume we have observations at observation timesteps
// 0, 4, 10, 14, 22
// Then, to advance the system from one observation timestep to the next
// we have to use following observation timestep gaps:
// 4, 6, 4, 8
// If we use num_modeling_timesteps_per_one_observation_timestep = 1, we
// have to calculate matrix exponentials for 4, 6 and 8 to advance the system.
// However, if we instead use
// num_modeling_timesteps_per_one_observation_timestep = gcd(4, 6, 8) = 2,
// we could compute matrix exponentials for 2, 3, 4.
// But, then if we later (during projection) have to advance the system by 1
// observation timestep, we have to compute a matrix exponential for 0.5.
// If we use
// num_modeling_timesteps_per_one_observation_timestep = min(4, 6, 8) = 4,
// we have to compute matrix exponentials for 1, 1.5, 2 and to advance the
// system by 1 observation timestep we have to compute a matrix exponential
// for 0.25.
// In general, to advance the system by t observation timesteps, we have to
// compute a matrix exponential for
// t/num_modeling_timesteps_per_one_observation_timestep.
// For the current naive seasonal head node model to work,
// num_modeling_timesteps_per_one_observation_timestep **MUST** be 1
this->num_modeling_timesteps_per_one_observation_timestep = 1; // one timestep //frequent_gap;
vector<double> modeling_timesteps(this->observation_timesteps_sorted.size());
transform(this->observation_timesteps_sorted.begin(),
this->observation_timesteps_sorted.end(),
modeling_timesteps.begin(),
[&](long observation_timestep) {
//return this->epoch_to_timestep(observation_timestep, this->train_start_epoch,
// this->num_modeling_timesteps_per_one_observation_timestep);
//return this->epoch_to_timestep(
// observation_timestep, 0,
// this->num_modeling_timesteps_per_one_observation_timestep);
return observation_timestep / this->num_modeling_timesteps_per_one_observation_timestep;
});
this->modeling_timestep_gaps.clear();
this->modeling_timestep_gaps = vector<double>(modeling_timesteps.size());
adjacent_difference(modeling_timesteps.begin(),
modeling_timesteps.end(),
this->modeling_timestep_gaps.begin());
}
void AnalysisGraph::infer_concept_period(const ConceptIndicatorEpochs &concept_indicator_epochs) {
double milliseconds_per_day = 24 * 60 * 60 * 1000.0;
int min_days_global = INT32_MAX;
int start_day = INT32_MAX;
this->num_modeling_timesteps_per_one_observation_timestep = 1;
for (int concept_id = 0; concept_id < concept_indicator_epochs.size();
concept_id++) {
vector<long> ind_epochs = concept_indicator_epochs[concept_id];
sort(ind_epochs.begin(), ind_epochs.end());
vector<long> ind_days(ind_epochs.size());
transform(ind_epochs.begin(), ind_epochs.end(), ind_days.begin(),
[&](long epoch) {
return round(epoch / milliseconds_per_day);
});
vector<tuple<int, int, int>> year_month_dates(ind_epochs.size());
transform(ind_epochs.begin(), ind_epochs.end(),
year_month_dates.begin(),
[&](long epoch) {
return this->epoch_to_year_month_date(epoch);
});
vector<int> gaps_in_months(year_month_dates.size() - 1);
int shortest_monthly_gap_ind = INT32_MAX;
for (int ts = 0; ts < year_month_dates.size() - 1; ts++) {
int observation_timesteps = delphi::utils::observation_timesteps_between(
year_month_dates[ts], year_month_dates[ts + 1]);
gaps_in_months[ts] = observation_timesteps;
if (observation_timesteps < shortest_monthly_gap_ind) {
shortest_monthly_gap_ind = observation_timesteps;
}
}
// vector<long> gaps(ind_epochs.size());
// adjacent_difference(ind_epochs.begin(), ind_epochs.end(), gaps.begin());
//
// vector<int> days_between(gaps.size() - 1);
// transform(gaps.begin() + 1, gaps.end(), days_between.begin(),
// [&](long gap) {
// return round(gap / milliseconds_per_day);
// });
vector<int> days_between(ind_days.size());
adjacent_difference(ind_days.begin(), ind_days.end(), days_between.begin());
int min_days = INT32_MAX;
// for (int days : days_between) {
// if (days < min_days) {
// min_days = days;
// }
// }
for (int idx = 1; idx < days_between.size(); idx++) {
int days = days_between[idx];
if (days < min_days) {
min_days = days;
}
}
if (ind_days[0] < start_day) {
start_day = ind_days[0];
}
if (min_days < min_days_global) {
min_days_global = min_days;
}
int period = 1;
if (min_days == 1) {
// Daily
period = 365;
} else if (min_days == 7) {
// Weekly
period = 52;
} else if (28 <= min_days && min_days <= 31) {
// Monthly
period = 12;
} else if (59 <= min_days && min_days <= 62) {
// 2 Months
period = 6;
} else if (89 <= min_days && min_days <= 92) {
// 3 Months
period = 4;
} else if (120 <= min_days && min_days <= 123) {
// 4 Months
period = 3;
} else if (181 <= min_days && min_days <= 184) {
// 6 Months
period = 2;
}
/*
else if (365 <= min_days && min_days <= 366) {
// Yearly
}
else if (730 <= min_days && min_days <= 731) {
// 2 Years
} else if (1095 <= min_days && min_days <= 1096) {
// 3 Years
} else if (1460 <= min_days && min_days <= 1461) {
// 4 Years
} else if (1825 <= min_days && min_days <= 1827) {
// 5 Years
}
*/
this->graph[concept_id].period = period;
this->num_modeling_timesteps_per_one_observation_timestep = lcm(this->num_modeling_timesteps_per_one_observation_timestep, period);
for (auto yyyy_mm_dd : year_month_dates) {
cout << "(" << get<0>(yyyy_mm_dd) << "-" << get<1>(yyyy_mm_dd) << "-" << get<2>(yyyy_mm_dd) << "), ";
}
cout << endl;
}
}
void
AnalysisGraph::set_observed_state_sequence_from_json_dict(
const nlohmann::json &json_indicators) {
int num_verts = this->num_vertices();
// This is a multimap to keep provision to have multiple observations per
// time point per indicator.
// Access (concept is a vertex in the CAG)
// [ concept ][ indicator ][ epoch --→ observation ]
ConceptIndicatorData concept_indicator_data(num_verts);
// Keeps the sequence of epochs for which data points are available
// Data points are sorted according to epochs
// Access:
// [ concept ][ indicator ][epoch]
ConceptIndicatorEpochs concept_indicator_epochs(num_verts);
this->extract_concept_indicator_mapping_and_observations_from_json(
json_indicators, concept_indicator_data, concept_indicator_epochs);
// Decide the data frequency.
long shortest_gap = LONG_MAX;
long longest_gap = 0;
long frequent_gap = LONG_MAX;
int highest_frequency = 0;
this->n_timesteps = 0;
unordered_map<int, unordered_set<long>> observation_timestep_to_epochs;
if (this->train_start_epoch <= this->train_end_epoch) {
// Convert epochs to observation timesteps making train_start_epoch = 0
tuple<int, int, int> train_start_date =
this->epoch_to_year_month_date(this->train_start_epoch);
tuple<int, int, int> train_end_date =
this->epoch_to_year_month_date(this->train_end_epoch);
for (int v = 0; v < num_verts; v++) {
Node& n = (*this)[v];
// int shortest_monthly_gap_ind = INT32_MAX;
for (int obs = 0; obs < concept_indicator_epochs[v].size(); obs++) {
long epoch = concept_indicator_epochs[v][obs];
tuple<int, int, int> obs_date =
this->epoch_to_year_month_date(epoch);
int observation_timestep = delphi::utils::observation_timesteps_between(
train_start_date, obs_date, n.agg_level);
// We reuse this variable to store concept_indicator_timesteps
concept_indicator_epochs[v][obs] = observation_timestep;
observation_timestep_to_epochs[observation_timestep].insert(epoch);
/*
// TODO: There are cases this period estimation goes wrong. Need revision.
// e.g. observation months: 1, 3, 5, 8, 10, 12
// The good solution is to get the gcd of all the observation gaps for a indicator
if (obs > 0) {
// TODO: Since we don't sort concept_indicator_epochs[v] this calculation is wrong
int gap = concept_indicator_epochs[v][obs] -
concept_indicator_epochs[v][obs - 1];
if (gap < shortest_monthly_gap_ind) {
shortest_monthly_gap_ind = gap;
}
}
*/
}
/*
int period = 1;
if (shortest_monthly_gap_ind == 1) {
// Monthly
period = 12;
} else if (shortest_monthly_gap_ind == 2) {
// 2 Months
period = 6;
} else if (shortest_monthly_gap_ind == 3) {
// 3 Months
period = 4;
} else if (shortest_monthly_gap_ind == 4) {
// 4 Months
period = 3;
} else if (shortest_monthly_gap_ind == 6) {
// 6 Months
period = 2;
}
this->graph[v].period = period;
*/
}
// cout << "Inferring periods\n";
// this->infer_concept_period(concept_indicator_epochs);
// Some training data has been provided
this->infer_modeling_period(concept_indicator_epochs, shortest_gap,
longest_gap, frequent_gap, highest_frequency);
this->n_timesteps = this->observation_timesteps_sorted.size();
}
this->observed_state_sequence.clear();
// Access (concept is a vertex in the CAG)
// [ timestep ][ concept ][ indicator ][ observation ]
this->observed_state_sequence = ObservedStateSequence(this->n_timesteps);
// Fill in observed state sequence
// NOTE: This code is very similar to the implementations in
// set_observed_state_sequence_from_data and get_observed_state_from_data
for (int ts = 0; ts < this->n_timesteps; ts++) {
this->observed_state_sequence[ts] = vector<vector<vector<double>>>(num_verts);
for (int v = 0; v < num_verts; v++) {
Node& n = (*this)[v];
if (concept_indicator_data[v].empty()) {
// This concept has no indicator specified in the create-model
// call
continue;
}
this->observed_state_sequence[ts][v] = vector<vector<double>>(n.indicators.size());
for (int i = 0; i < n.indicators.size(); i++) {
this->observed_state_sequence[ts][v][i] = vector<double>();
for (long epochs_in_ts :
observation_timestep_to_epochs[this->observation_timesteps_sorted[ts]]) {
pair<multimap<long, double>::iterator,
multimap<long, double>::iterator>
obs = concept_indicator_data[v][i].equal_range(epochs_in_ts);
for (auto it = obs.first; it != obs.second; it++) {
this->observed_state_sequence[ts][v][i].push_back(
it->second);
}
}
}
}
}
}
void AnalysisGraph::from_causemos_json_dict(const nlohmann::json &json_data,
double belief_score_cutoff,
double grounding_score_cutoff
) {
this->causemos_call = true;
// TODO: If model id is not present, we might want to not create the model
// and send a failure response. At the moment we just create a blank model,
// which could lead to future bugs that are hard to debug.
if (!json_data.contains("id")){return;}
this->id = json_data["id"].get<string>();
if (!json_data.contains("statements")) {return;}
if(json_data.contains("experiment_id")){
this->experiment_id = json_data["experiment_id"].get<string>();
}
auto statements = json_data["statements"];
for (auto stmt : statements) {
if (!stmt.contains("belief") or
stmt["belief"].get<double>() < belief_score_cutoff) {
continue;
}
auto evidence = stmt["evidence"];
if (evidence.is_null()) {
continue;
}
auto subj = stmt["subj"];
auto obj = stmt["obj"];
if (subj.is_null() or obj.is_null()) {
continue;
}
auto subj_score_json = subj["concept_score"];
auto obj_score_json = obj["concept_score"];
if (subj_score_json.is_null() or obj_score_json.is_null()) {
continue;
}
double subj_score = subj_score_json.get<double>();
double obj_score = obj_score_json.get<double>();
if (subj_score < grounding_score_cutoff or
obj_score < grounding_score_cutoff) {
continue;
}
auto subj_concept_json = subj["concept"];
auto obj_concept_json = obj["concept"];
if (subj_concept_json.is_null() or obj_concept_json.is_null()) {
continue;
}
string subj_name = subj_concept_json.get<string>();
string obj_name = obj_concept_json.get<string>();
if (subj_name.compare(obj_name) == 0) { // Guard against self loops
// Add the nodes to the graph if they are not in it already
continue;
}
this->add_node(subj_name);
this->add_node(obj_name);
// Add the edge to the graph if it is not in it already
for (auto evid : evidence) {
if (!evid.contains("evidence_context")) {
continue;
}
auto evid_context = evid["evidence_context"];
auto subj_polarity_json = evid_context["subj_polarity"];
auto obj_polarity_json = evid_context["obj_polarity"];
// We set polarities to 1 (positive) by default if they are not specified.
int subj_polarity_val = 1;
int obj_polarity_val = 1;
if (!subj_polarity_json.is_null()) {
subj_polarity_val = subj_polarity_json.get<int>();
}
if (!obj_polarity_json.is_null()) {
obj_polarity_val = obj_polarity_json.get<int>();
}
auto subj_adjectives_json = evid_context["subj_adjectives"];
auto obj_adjectives_json = evid_context["obj_adjectives"];
vector<string> subj_adjectives{"None"};
vector<string> obj_adjectives{"None"};
if(!subj_adjectives_json.is_null()){
if(subj_adjectives_json.size() > 0){
subj_adjectives = subj_adjectives_json.get<vector<string>>();
}
}
if(!obj_adjectives_json.is_null()){
if(obj_adjectives_json.size() > 0){
obj_adjectives = obj_adjectives_json.get<vector<string>>();
}
}
auto causal_fragment =
CausalFragment({subj_adjectives[0], subj_polarity_val, subj_name},
{obj_adjectives[0], obj_polarity_val, obj_name});
this->add_edge(causal_fragment);
}
}
if (!json_data.contains("conceptIndicators")) {
// No indicator data provided.
// TODO: What is the best action here?
//throw runtime_error("No indicator information provided");
// Maybe this is acceptable since there is another call: edit-indicators,
// which is not yet implemented. An analyst can create a CAG structure
// without any indicators and then later attach indicators one by one.
} else {
this->set_observed_state_sequence_from_json_dict(json_data["conceptIndicators"]);
}
}
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
create-experiment (private)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
std::tuple<int, int, int>
AnalysisGraph::epoch_to_year_month_date(long epoch) {
// The HMI uses milliseconds. So they multiply time-stamps by 1000.
// Before converting them back to year and month, we have to divide
// by 1000.
epoch /= 1000;
// Convert the time-step to year and month.
// We are converting it according to GMT.
struct tm *ptm = gmtime(&epoch);
int year = 1900 + ptm->tm_year;
int month = 1 + ptm->tm_mon;
int date = ptm->tm_mday;
if (date > 1) {
print("* * * * * WARNING: Observation timestamp {0}-{1}-{2} does not adhere to the protocol!\n", year, month, date);
}
return make_tuple(year, month, date);
}
void AnalysisGraph::extract_projection_constraints(
const nlohmann::json &projection_constraints, long skip_steps) {
for (auto constraint : projection_constraints) {
if (constraint["concept"].is_null()) {continue;}
string concept_name = constraint["concept"].get<string>();
for (auto values : constraint["values"]) {
// We need both the step and the value to proceed. Thus checking
// again to reduce bookkeeping.
if (values["step"].is_null()) {continue;}
if (values["value"].is_null()) {continue;}
int step = values["step"].get<int>();
double ind_value = values["value"].get<double>();
// NOTE: Delphi is capable of attaching multiple indicators to a
// single concept. Since we are constraining the latent state,
// we can constrain (or intervene) based on only one of those
// indicators. By passing an empty indicator name, we choose
// to constrain the first indicator attached to a concept
// which should be present irrespective of whether this
// concept has one or more indicators attached to it.
//
// Requested prediction stat epoch is earlier than the training
// start epoch. The first requested prediction epoch after the
// training start epoch is skip_steps after the requested prediction
// start epoch and the constraints within those skiped epochs
// cannot be applied and hence are been ignored.
if(step >= skip_steps){
this->add_constraint(step-skip_steps, concept_name, "", ind_value);
}
}
}
}
FormattedProjectionResult
AnalysisGraph::run_causemos_projection_experiment_from_json_dict(
const nlohmann::json &json_data) {
ExperimentStatus es(this->id, this->experiment_id);
if (json_data["experimentParam"].is_null()) {
throw BadCausemosInputException("Experiment parameters null");
}
auto projection_parameters = json_data["experimentParam"];
if (projection_parameters["startTime"].is_null()) {
throw BadCausemosInputException("Projection start time null");
}
long proj_start_epoch = projection_parameters["startTime"].get<long>();
if (projection_parameters["endTime"].is_null()) {
throw BadCausemosInputException("Projection end time null");
}
long proj_end_epoch = projection_parameters["endTime"].get<long>();
if (projection_parameters["numTimesteps"].is_null()) {
throw BadCausemosInputException("Projection number of time steps null");
}
this->pred_timesteps = projection_parameters["numTimesteps"].get<int>();
if(proj_start_epoch > proj_end_epoch) {
throw BadCausemosInputException("Projection end epoch is before projection start epoch");
}
int skip_steps = 0;
if (this->observed_state_sequence.empty()) {
// No training data has been provided
// "Train" (more like derive) a model based on prior distributions
cout << "\nNOTE:\n\t\"Training\" a model, without any training observations, using only prior distributions!\n";
this->train_start_epoch = -1;
this->pred_start_timestep = 0;
this->delta_t = 1;
this->pred_timesteps++;
} else {
/*
this->pred_start_timestep = this->epoch_to_timestep(
proj_start_epoch, this->train_start_epoch, this->num_modeling_timesteps_per_one_observation_timestep);
double pred_end_timestep = this->epoch_to_timestep(
proj_end_epoch, this->train_start_epoch, this->num_modeling_timesteps_per_one_observation_timestep);
this->delta_t = (pred_end_timestep - this->pred_start_timestep) /
(this->pred_timesteps - 1.0);
*/
tuple<int, int, int> train_start_date =
this->epoch_to_year_month_date(this->train_start_epoch);
tuple<int, int, int> pred_start_date =
this->epoch_to_year_month_date(proj_start_epoch);
this->pred_start_timestep = delphi::utils::observation_timesteps_between(
train_start_date, pred_start_date, this->model_data_agg_level);
this->delta_t = 1;
tuple<int, int, int> pred_end_date =
this->epoch_to_year_month_date(proj_end_epoch);
//int num_pred_months = delphi::utils::observation_timesteps_between(pred_start_date, pred_end_date);
//cout << "(" << get<0>(pred_end_date) << "-" << get<1>(pred_end_date) << "-" << get<2>(pred_end_date) << "), ";
//cout << "(" << get<0>(pred_start_date) << "-" << get<1>(pred_start_date) << "-" << get<2>(pred_start_date) << "), ";
// To help clamping we predict one additional timestep
this->pred_timesteps++;
this->pred_start_timestep -= this->delta_t;
// Prevent predicting for timesteps earlier than training start time
if (this->pred_start_timestep < 0) {
cout << "\nNOTE:\n\t\"Predicting\" in the past\n";
/*
skip_steps = ceil(abs(this->pred_start_timestep) / this->delta_t);
this->pred_start_timestep += skip_steps;
this->pred_timesteps -= skip_steps;
*/
}
/*
if (this->pred_start_timestep > pred_end_timestep) {
throw BadCausemosInputException(
"Projection end epoch is before projection start epoch");
}
*/
}
this->extract_projection_constraints(projection_parameters["constraints"], skip_steps);
this->generate_latent_state_sequences(this->pred_start_timestep);
this->generate_observed_state_sequences();
return this->format_projection_result();
}
FormattedProjectionResult AnalysisGraph::format_projection_result() {
// Access
// [ vertex_name ][ timestep ][ sample ]
FormattedProjectionResult result;
// To facilitate clamping derivative we start predicting one timestep before
// the requested prediction start timestep. We are removing that timestep when
// returning the results.
for (auto [vert_name, vert_id] : this->name_to_vertex) {
result[vert_name] =
vector<vector<double>>(this->pred_timesteps - 1, vector<double>(this->res));
for (int ts = 0; ts < this->pred_timesteps - 1; ts++) {
for (int samp = 0; samp < this->res; samp++) {
result[vert_name][ts][samp] =
this->predicted_observed_state_sequences[samp][ts + 1][vert_id][0];
}
ranges::sort(result[vert_name][ts]);
}
}
return result;
}
void AnalysisGraph::sample_transition_matrix_collection_from_prior() {
this->transition_matrix_collection.clear();
this->transition_matrix_collection = vector<Eigen::MatrixXd>(this->res);
for (int sample = 0; sample < this->res; sample++) {
for (auto e : this->edges()) {
this->graph[e].set_theta(this->graph[e].kde.resample(
1, this->rand_num_generator, this->uni_dist, this->norm_dist)[0]);
}
// Create this->A_original based on the sampled β and remember it
this->set_transition_matrix_from_betas();
this->transition_matrix_collection[sample] = this->A_original;
}
}
/*
============================================================================
Public: Integration with Uncharted's CauseMos interface
============================================================================
*/
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
create-model (public)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
AnalysisGraph AnalysisGraph::from_causemos_json_string(string json_string,
double belief_score_cutoff,
double grounding_score_cutoff,
int kde_kernels
) {
AnalysisGraph G;
G.n_kde_kernels = kde_kernels;
auto json_data = nlohmann::json::parse(json_string);
G.from_causemos_json_dict(json_data, belief_score_cutoff, grounding_score_cutoff);
return G;
}
AnalysisGraph AnalysisGraph::from_causemos_json_file(string filename,
double belief_score_cutoff,
double grounding_score_cutoff,
int kde_kernels
) {
AnalysisGraph G;
G.n_kde_kernels = kde_kernels;
auto json_data = load_json(filename);
G.from_causemos_json_dict(json_data, belief_score_cutoff, grounding_score_cutoff);
return G;
}
string AnalysisGraph::generate_create_model_response() {
using nlohmann::json, ranges::max;
json j;
j["status"] = this->trained? "ready" : "training";
j["relations"] = {};
j["conceptIndicators"] = {};
for (auto e : this->edges()) {
/*
vector<double> weights;
if (this->trained) {
weights = vector<double>(this->graph[e].sampled_thetas.size());
transform(this->graph[e].sampled_thetas.begin(),
this->graph[e].sampled_thetas.end(),
weights.begin(),
[&](double theta) {
return (double)tan(theta);
});
} else {
weights = vector<double>{0.5};
}
*/
json edge_json = {{"source", this->source(e).name},
{"target", this->target(e).name},
{"weights", this->trained
? vector<double>{((this->graph[e].is_frozen()
? this->graph[e].get_theta()
: mean(this->graph[e].sampled_thetas))
+ M_PI_2) * M_2_PI - 1
}
: this->graph[e].is_frozen()
? vector<double>{(this->graph[e].get_theta()
+ M_PI_2) * M_2_PI - 1}
: vector<double>{0.5}}};
j["relations"].push_back(edge_json);
}
int num_verts = this->num_vertices();
for (int v = 0; v < num_verts; v++) {
Node& n = (*this)[v];
j["conceptIndicators"][n.name] = {{"initialValue", nullptr},
{"scalingFactor", nullptr},
{"scalingBias", nullptr}};
}
return j.dump();
}
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
create-experiment (public)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
FormattedProjectionResult
AnalysisGraph::run_causemos_projection_experiment_from_json_string(
string json_string) {
using namespace fmt::literals;
using nlohmann::json;
// During the create-model call we called construct_theta_pdfs() and
// serialized them to json. When we recreate the model we load them. So to
// prevent Delphi putting cycle to compute them again when we call
// initialize_parameters() below, let us inform Delphi that this is a
// CauseMos call.
// NOTE: In hindsight, I might be able to prevent Delphi calling
// initialize_parameters() within train_model() when we train due to
// a CauseMos call. I need to revise it to see whether anything is
// called out of order.
this->causemos_call = true;
auto json_data = nlohmann::json::parse(json_string);
try {
return run_causemos_projection_experiment_from_json_dict(json_data);
}
catch (BadCausemosInputException& e) {
cout << e.what() << endl;
// Just a dummy empty prediction to signal that there is an error in
// projection parameters.
return FormattedProjectionResult();
}
}
FormattedProjectionResult
AnalysisGraph::run_causemos_projection_experiment_from_json_file(
string filename) {
using namespace fmt::literals;
using nlohmann::json;
// During the create-model call we called construct_theta_pdfs() and
// serialized them to json. When we recreate the model we load them. So to
// prevent Delphi putting cycle to compute them again when we call
// initialize_parameters() below, let us inform Delphi that this is a
// CauseMos call.
// NOTE: In hindsight, I might be able to prevent Delphi calling
// initialize_parameters() within train_model() when we train due to
// a CauseMos call. I need to revise it to see whether anything is
// called out of order.
this->causemos_call = true;
auto json_data = load_json(filename);
try {
return run_causemos_projection_experiment_from_json_dict(json_data);
}
catch (BadCausemosInputException& e) {
cout << e.what() << endl;
// Just a dummy empty prediction to signal that there is an error in
// projection parameters.
return FormattedProjectionResult();
}
}
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
edit-weights
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
unsigned short AnalysisGraph::freeze_edge_weight(std::string source_name,
std::string target_name,
double scaled_weight,
int polarity) {
int source_id = -1;
int target_id = -1;
try {
source_id = this->name_to_vertex.at(source_name);
} catch(const out_of_range &e) {
// Source concept does not exist
return 2;
}
try {
target_id = this->name_to_vertex.at(target_name);
} catch(const out_of_range &e) {
// Target concept does not exist
return 4;
}
pair<EdgeDescriptor, bool> edg = boost::edge(source_id, target_id,
this->graph);
if (!edg.second) {
// There is no edge from source concept to target concept
return 8;
}
if (polarity == 0) {
this->graph[edg.first].unfreeze();
// Prevent initializing the model parameters to the MAP estimate of the
// previous training run to give a warm start to training
this->MAP_sample_number = -1;
return 0;
}
if (scaled_weight < 0 || scaled_weight >= 1) {
return 1;
}
double theta = polarity / abs(polarity) * scaled_weight * M_PI_2;
this->graph[edg.first].unfreeze();
this->graph[edg.first].set_theta(theta);
this->graph[edg.first].freeze();
this->trained = false;
return 0;
}