.. _program_listing_file_lib_causemos_integration.cpp: Program Listing for File causemos_integration.cpp ================================================= |exhale_lsh| :ref:`Return to documentation for file ` (``lib/causemos_integration.cpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp // CauseMos integration methods #include "AnalysisGraph.hpp" #include "ExperimentStatus.hpp" #include "utils.hpp" #include #include #include #include #include using namespace std; using namespace delphi::utils; using namespace fmt::literals; using fmt::print; /* ============================================================================ Private: Integration with Uncharted's CauseMos interface ============================================================================ */ /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ create-model (private) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ void AnalysisGraph::extract_concept_indicator_mapping_and_observations_from_json( const nlohmann::json &json_indicators, ConceptIndicatorData &concept_indicator_data, ConceptIndicatorEpochs &concept_indicator_epochs) { int num_verts = this->num_vertices(); this->train_start_epoch = LONG_MAX; this->train_end_epoch = 0; long epoch; DataAggregationLevel first_resolution = DataAggregationLevel::NONE; for (int v = 0; v < num_verts; v++) { Node& n = (*this)[v]; // At the moment we are only attaching one indicator per node // when Analysis graph is called through CauseMos string indicator_name = "Qualitative measure of {}"_format(n.name); string indicator_source = "Delphi"; if (!json_indicators.contains(n.name)) { // In this case we do not have any observation data to train the model this->set_indicator(n.name, indicator_name, indicator_source); n.get_indicator(indicator_name).set_mean(1.0); continue; } // TODO: At the moment the json file specifies one indicator per // concept. At a later time if we update the json file to specify // multiple indicators per concept, this is one place we have to // update. Instead of a single indicator, we might get a list of // indicators here. Rest of the code would have to be updated // according to whatever the updated json file format we come up with. auto indicator = json_indicators[n.name]; if (!indicator["source"].is_null()) { indicator_source = indicator["source"].get(); } if (indicator["name"].is_null()) { // This case there could be observations. However we are discarding // them. Why? this->set_indicator(n.name, indicator_name, indicator_source); n.get_indicator(indicator_name).set_mean(1.0); continue; } indicator_name = indicator["name"].get(); if (!indicator["minValue"].is_null()) { n.has_min = true; n.min_val_obs = indicator["minValue"].get(); } if (!indicator["maxValue"].is_null()) { n.has_max = true; n.max_val_obs = indicator["maxValue"].get(); } if (!indicator["period"].is_null()) { n.period = indicator["period"].get(); } if (!indicator["resolution"].is_null()) { string resolution = indicator["resolution"].get(); if (resolution.compare("month") == 0) { n.agg_level = DataAggregationLevel::MONTHLY; } else if (resolution.compare("year") == 0) { n.agg_level = DataAggregationLevel::YEARLY; } } if (first_resolution == DataAggregationLevel::NONE) { first_resolution = n.agg_level; this->model_data_agg_level = n.agg_level; } else if (first_resolution != n.agg_level) { cout << "\n* * * WARNING: Mixed resolution observations! * * *\n"; cout << "\t" << n.name << " has resolution " << (n.agg_level == DataAggregationLevel::YEARLY ? "yearly" : "monthly") << endl; cout << "\tDelphi is currently not designed to model mixed resolution data\n"; cout << "\tThe predictions will be unintuitive\n\n"; } int ind_idx = this->set_indicator(n.name, indicator_name, indicator_source); if (ind_idx == -1) { // This is a duplicate indicator, and will not be added again. // TODO: Decide how to handle this situation and inform the user at // the CauseMos HMI end. continue; } // [ epoch --→ observation ] multimap indicator_data; // Accumulate which epochs data points are available for // The idea is to use this to assess the frequency of the data. set epochs; for (auto& data_point : indicator["values"]) { if (data_point["value"].is_null()) { // This is a missing data point continue; } double observation = data_point["value"].get(); if (data_point["timestamp"].is_null()) {continue;} // Note: HMI sends epochs as unix epoch * 1000 epoch = data_point["timestamp"].get(); // Keep track of multiple observations for each epoch indicator_data.insert(make_pair(epoch, observation)); // Record the epochs where observations are available for this // indicator. This data is used to assess the observation // frequency. epochs.insert(epoch); // Find the start epoch of observations. When observation // sequences are not aligned: // start epoch => earliest observation among all the // observation sequences. if (this->train_start_epoch > epoch) { this->train_start_epoch = epoch; } // Find the end epoch of observations. When observation // sequences are not aligned: // end epoch => latest observation among all the observation // sequences. if (this->train_end_epoch < epoch) { this->train_end_epoch = epoch; } } // Add this indicator observations to the concept. The data structure // has provision to assign multiple indicator observation sequences for // a single concept. concept_indicator_data[v].push_back(indicator_data); // To assess the frequency of the data concept_indicator_epochs[v] = vector(epochs.begin(), epochs.end()); } } double AnalysisGraph::epoch_to_timestep(long epoch, long train_start_epoch, long modeling_frequency) { return (epoch - train_start_epoch) / double(modeling_frequency); } void AnalysisGraph::infer_modeling_period( const ConceptIndicatorEpochs & concept_indicator_observation_timesteps, long &shortest_gap, long &longest_gap, long &frequent_gap, int &highest_frequency) { unordered_set observation_timesteps_all; for (vector ind_observation_timesteps : concept_indicator_observation_timesteps) { observation_timesteps_all.insert(ind_observation_timesteps.begin(), ind_observation_timesteps.end()); } this->observation_timesteps_sorted = vector(observation_timesteps_all.begin(), observation_timesteps_all.end()); sort(this->observation_timesteps_sorted.begin(), this->observation_timesteps_sorted.end()); vector observation_timestep_gaps(this->observation_timesteps_sorted.size()); adjacent_difference(this->observation_timesteps_sorted.begin(), this->observation_timesteps_sorted.end(), observation_timestep_gaps.begin()); // Compute number of epochs between data points unordered_map gap_frequencies; unordered_map::iterator itr; for (int gap = 1; gap < observation_timestep_gaps.size(); gap++) { // Check whether two adjacent data points with the same gap of // epochs in between is already found. itr = gap_frequencies.find(observation_timestep_gaps[gap]); if (itr != gap_frequencies.end()) { // There were previous adjacent pairs of data points with // gap epochs in between. Now we have found one more // so increase the number of data points at this frequency. itr->second++; } else { // This is the first data point that is gap epochs // away from its previous data point. Start recording this new // frequency. gap_frequencies.insert(make_pair(observation_timestep_gaps[gap], 1)); } } // Find the smallest gap and most frequent gap shortest_gap = LONG_MAX; longest_gap = 0; frequent_gap = LONG_MAX; highest_frequency = 0; // Epochs statistics for individual indicator time series for debugging purposes. for(auto const& [gap, freq] : gap_frequencies){ if (shortest_gap > gap) { shortest_gap = gap; } if (longest_gap < gap) { longest_gap = gap; } if (highest_frequency == freq) { // In case of multiple observation_timestep_gaps having the same highest frequency, // note down the shortest highest frequency gap if (frequent_gap > gap) { frequent_gap = gap; } } else if (highest_frequency < freq) { highest_frequency = freq; frequent_gap = gap; } } // num_modeling_timesteps_per_one_observation_timestep is the number of // observation timesteps taken as one modeling timestep. // Let us assume we have observations at observation timesteps // 0, 4, 10, 14, 22 // Then, to advance the system from one observation timestep to the next // we have to use following observation timestep gaps: // 4, 6, 4, 8 // If we use num_modeling_timesteps_per_one_observation_timestep = 1, we // have to calculate matrix exponentials for 4, 6 and 8 to advance the system. // However, if we instead use // num_modeling_timesteps_per_one_observation_timestep = gcd(4, 6, 8) = 2, // we could compute matrix exponentials for 2, 3, 4. // But, then if we later (during projection) have to advance the system by 1 // observation timestep, we have to compute a matrix exponential for 0.5. // If we use // num_modeling_timesteps_per_one_observation_timestep = min(4, 6, 8) = 4, // we have to compute matrix exponentials for 1, 1.5, 2 and to advance the // system by 1 observation timestep we have to compute a matrix exponential // for 0.25. // In general, to advance the system by t observation timesteps, we have to // compute a matrix exponential for // t/num_modeling_timesteps_per_one_observation_timestep. // For the current naive seasonal head node model to work, // num_modeling_timesteps_per_one_observation_timestep **MUST** be 1 this->num_modeling_timesteps_per_one_observation_timestep = 1; // one timestep //frequent_gap; vector modeling_timesteps(this->observation_timesteps_sorted.size()); transform(this->observation_timesteps_sorted.begin(), this->observation_timesteps_sorted.end(), modeling_timesteps.begin(), [&](long observation_timestep) { //return this->epoch_to_timestep(observation_timestep, this->train_start_epoch, // this->num_modeling_timesteps_per_one_observation_timestep); //return this->epoch_to_timestep( // observation_timestep, 0, // this->num_modeling_timesteps_per_one_observation_timestep); return observation_timestep / this->num_modeling_timesteps_per_one_observation_timestep; }); this->modeling_timestep_gaps.clear(); this->modeling_timestep_gaps = vector(modeling_timesteps.size()); adjacent_difference(modeling_timesteps.begin(), modeling_timesteps.end(), this->modeling_timestep_gaps.begin()); } void AnalysisGraph::infer_concept_period(const ConceptIndicatorEpochs &concept_indicator_epochs) { double milliseconds_per_day = 24 * 60 * 60 * 1000.0; int min_days_global = INT32_MAX; int start_day = INT32_MAX; this->num_modeling_timesteps_per_one_observation_timestep = 1; for (int concept_id = 0; concept_id < concept_indicator_epochs.size(); concept_id++) { vector ind_epochs = concept_indicator_epochs[concept_id]; sort(ind_epochs.begin(), ind_epochs.end()); vector ind_days(ind_epochs.size()); transform(ind_epochs.begin(), ind_epochs.end(), ind_days.begin(), [&](long epoch) { return round(epoch / milliseconds_per_day); }); vector> year_month_dates(ind_epochs.size()); transform(ind_epochs.begin(), ind_epochs.end(), year_month_dates.begin(), [&](long epoch) { return this->epoch_to_year_month_date(epoch); }); vector gaps_in_months(year_month_dates.size() - 1); int shortest_monthly_gap_ind = INT32_MAX; for (int ts = 0; ts < year_month_dates.size() - 1; ts++) { int observation_timesteps = delphi::utils::observation_timesteps_between( year_month_dates[ts], year_month_dates[ts + 1]); gaps_in_months[ts] = observation_timesteps; if (observation_timesteps < shortest_monthly_gap_ind) { shortest_monthly_gap_ind = observation_timesteps; } } // vector gaps(ind_epochs.size()); // adjacent_difference(ind_epochs.begin(), ind_epochs.end(), gaps.begin()); // // vector days_between(gaps.size() - 1); // transform(gaps.begin() + 1, gaps.end(), days_between.begin(), // [&](long gap) { // return round(gap / milliseconds_per_day); // }); vector days_between(ind_days.size()); adjacent_difference(ind_days.begin(), ind_days.end(), days_between.begin()); int min_days = INT32_MAX; // for (int days : days_between) { // if (days < min_days) { // min_days = days; // } // } for (int idx = 1; idx < days_between.size(); idx++) { int days = days_between[idx]; if (days < min_days) { min_days = days; } } if (ind_days[0] < start_day) { start_day = ind_days[0]; } if (min_days < min_days_global) { min_days_global = min_days; } int period = 1; if (min_days == 1) { // Daily period = 365; } else if (min_days == 7) { // Weekly period = 52; } else if (28 <= min_days && min_days <= 31) { // Monthly period = 12; } else if (59 <= min_days && min_days <= 62) { // 2 Months period = 6; } else if (89 <= min_days && min_days <= 92) { // 3 Months period = 4; } else if (120 <= min_days && min_days <= 123) { // 4 Months period = 3; } else if (181 <= min_days && min_days <= 184) { // 6 Months period = 2; } /* else if (365 <= min_days && min_days <= 366) { // Yearly } else if (730 <= min_days && min_days <= 731) { // 2 Years } else if (1095 <= min_days && min_days <= 1096) { // 3 Years } else if (1460 <= min_days && min_days <= 1461) { // 4 Years } else if (1825 <= min_days && min_days <= 1827) { // 5 Years } */ this->graph[concept_id].period = period; this->num_modeling_timesteps_per_one_observation_timestep = lcm(this->num_modeling_timesteps_per_one_observation_timestep, period); for (auto yyyy_mm_dd : year_month_dates) { cout << "(" << get<0>(yyyy_mm_dd) << "-" << get<1>(yyyy_mm_dd) << "-" << get<2>(yyyy_mm_dd) << "), "; } cout << endl; } } void AnalysisGraph::set_observed_state_sequence_from_json_dict( const nlohmann::json &json_indicators) { int num_verts = this->num_vertices(); // This is a multimap to keep provision to have multiple observations per // time point per indicator. // Access (concept is a vertex in the CAG) // [ concept ][ indicator ][ epoch --→ observation ] ConceptIndicatorData concept_indicator_data(num_verts); // Keeps the sequence of epochs for which data points are available // Data points are sorted according to epochs // Access: // [ concept ][ indicator ][epoch] ConceptIndicatorEpochs concept_indicator_epochs(num_verts); this->extract_concept_indicator_mapping_and_observations_from_json( json_indicators, concept_indicator_data, concept_indicator_epochs); // Decide the data frequency. long shortest_gap = LONG_MAX; long longest_gap = 0; long frequent_gap = LONG_MAX; int highest_frequency = 0; this->n_timesteps = 0; unordered_map> observation_timestep_to_epochs; if (this->train_start_epoch <= this->train_end_epoch) { // Convert epochs to observation timesteps making train_start_epoch = 0 tuple train_start_date = this->epoch_to_year_month_date(this->train_start_epoch); tuple train_end_date = this->epoch_to_year_month_date(this->train_end_epoch); for (int v = 0; v < num_verts; v++) { Node& n = (*this)[v]; // int shortest_monthly_gap_ind = INT32_MAX; for (int obs = 0; obs < concept_indicator_epochs[v].size(); obs++) { long epoch = concept_indicator_epochs[v][obs]; tuple obs_date = this->epoch_to_year_month_date(epoch); int observation_timestep = delphi::utils::observation_timesteps_between( train_start_date, obs_date, n.agg_level); // We reuse this variable to store concept_indicator_timesteps concept_indicator_epochs[v][obs] = observation_timestep; observation_timestep_to_epochs[observation_timestep].insert(epoch); /* // TODO: There are cases this period estimation goes wrong. Need revision. // e.g. observation months: 1, 3, 5, 8, 10, 12 // The good solution is to get the gcd of all the observation gaps for a indicator if (obs > 0) { // TODO: Since we don't sort concept_indicator_epochs[v] this calculation is wrong int gap = concept_indicator_epochs[v][obs] - concept_indicator_epochs[v][obs - 1]; if (gap < shortest_monthly_gap_ind) { shortest_monthly_gap_ind = gap; } } */ } /* int period = 1; if (shortest_monthly_gap_ind == 1) { // Monthly period = 12; } else if (shortest_monthly_gap_ind == 2) { // 2 Months period = 6; } else if (shortest_monthly_gap_ind == 3) { // 3 Months period = 4; } else if (shortest_monthly_gap_ind == 4) { // 4 Months period = 3; } else if (shortest_monthly_gap_ind == 6) { // 6 Months period = 2; } this->graph[v].period = period; */ } // cout << "Inferring periods\n"; // this->infer_concept_period(concept_indicator_epochs); // Some training data has been provided this->infer_modeling_period(concept_indicator_epochs, shortest_gap, longest_gap, frequent_gap, highest_frequency); this->n_timesteps = this->observation_timesteps_sorted.size(); } this->observed_state_sequence.clear(); // Access (concept is a vertex in the CAG) // [ timestep ][ concept ][ indicator ][ observation ] this->observed_state_sequence = ObservedStateSequence(this->n_timesteps); // Fill in observed state sequence // NOTE: This code is very similar to the implementations in // set_observed_state_sequence_from_data and get_observed_state_from_data for (int ts = 0; ts < this->n_timesteps; ts++) { this->observed_state_sequence[ts] = vector>>(num_verts); for (int v = 0; v < num_verts; v++) { Node& n = (*this)[v]; if (concept_indicator_data[v].empty()) { // This concept has no indicator specified in the create-model // call continue; } this->observed_state_sequence[ts][v] = vector>(n.indicators.size()); for (int i = 0; i < n.indicators.size(); i++) { this->observed_state_sequence[ts][v][i] = vector(); for (long epochs_in_ts : observation_timestep_to_epochs[this->observation_timesteps_sorted[ts]]) { pair::iterator, multimap::iterator> obs = concept_indicator_data[v][i].equal_range(epochs_in_ts); for (auto it = obs.first; it != obs.second; it++) { this->observed_state_sequence[ts][v][i].push_back( it->second); } } } } } } void AnalysisGraph::from_causemos_json_dict(const nlohmann::json &json_data, double belief_score_cutoff, double grounding_score_cutoff ) { this->causemos_call = true; // TODO: If model id is not present, we might want to not create the model // and send a failure response. At the moment we just create a blank model, // which could lead to future bugs that are hard to debug. if (!json_data.contains("id")){return;} this->id = json_data["id"].get(); if (!json_data.contains("statements")) {return;} if(json_data.contains("experiment_id")){ this->experiment_id = json_data["experiment_id"].get(); } auto statements = json_data["statements"]; for (auto stmt : statements) { if (!stmt.contains("belief") or stmt["belief"].get() < belief_score_cutoff) { continue; } auto evidence = stmt["evidence"]; if (evidence.is_null()) { continue; } auto subj = stmt["subj"]; auto obj = stmt["obj"]; if (subj.is_null() or obj.is_null()) { continue; } auto subj_score_json = subj["concept_score"]; auto obj_score_json = obj["concept_score"]; if (subj_score_json.is_null() or obj_score_json.is_null()) { continue; } double subj_score = subj_score_json.get(); double obj_score = obj_score_json.get(); if (subj_score < grounding_score_cutoff or obj_score < grounding_score_cutoff) { continue; } auto subj_concept_json = subj["concept"]; auto obj_concept_json = obj["concept"]; if (subj_concept_json.is_null() or obj_concept_json.is_null()) { continue; } string subj_name = subj_concept_json.get(); string obj_name = obj_concept_json.get(); if (subj_name.compare(obj_name) == 0) { // Guard against self loops // Add the nodes to the graph if they are not in it already continue; } this->add_node(subj_name); this->add_node(obj_name); // Add the edge to the graph if it is not in it already for (auto evid : evidence) { if (!evid.contains("evidence_context")) { continue; } auto evid_context = evid["evidence_context"]; auto subj_polarity_json = evid_context["subj_polarity"]; auto obj_polarity_json = evid_context["obj_polarity"]; // We set polarities to 1 (positive) by default if they are not specified. int subj_polarity_val = 1; int obj_polarity_val = 1; if (!subj_polarity_json.is_null()) { subj_polarity_val = subj_polarity_json.get(); } if (!obj_polarity_json.is_null()) { obj_polarity_val = obj_polarity_json.get(); } auto subj_adjectives_json = evid_context["subj_adjectives"]; auto obj_adjectives_json = evid_context["obj_adjectives"]; vector subj_adjectives{"None"}; vector obj_adjectives{"None"}; if(!subj_adjectives_json.is_null()){ if(subj_adjectives_json.size() > 0){ subj_adjectives = subj_adjectives_json.get>(); } } if(!obj_adjectives_json.is_null()){ if(obj_adjectives_json.size() > 0){ obj_adjectives = obj_adjectives_json.get>(); } } auto causal_fragment = CausalFragment({subj_adjectives[0], subj_polarity_val, subj_name}, {obj_adjectives[0], obj_polarity_val, obj_name}); this->add_edge(causal_fragment); } } if (!json_data.contains("conceptIndicators")) { // No indicator data provided. // TODO: What is the best action here? //throw runtime_error("No indicator information provided"); // Maybe this is acceptable since there is another call: edit-indicators, // which is not yet implemented. An analyst can create a CAG structure // without any indicators and then later attach indicators one by one. } else { this->set_observed_state_sequence_from_json_dict(json_data["conceptIndicators"]); } } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ create-experiment (private) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ std::tuple AnalysisGraph::epoch_to_year_month_date(long epoch) { // The HMI uses milliseconds. So they multiply time-stamps by 1000. // Before converting them back to year and month, we have to divide // by 1000. epoch /= 1000; // Convert the time-step to year and month. // We are converting it according to GMT. struct tm *ptm = gmtime(&epoch); int year = 1900 + ptm->tm_year; int month = 1 + ptm->tm_mon; int date = ptm->tm_mday; if (date > 1) { print("* * * * * WARNING: Observation timestamp {0}-{1}-{2} does not adhere to the protocol!\n", year, month, date); } return make_tuple(year, month, date); } void AnalysisGraph::extract_projection_constraints( const nlohmann::json &projection_constraints, long skip_steps) { for (auto constraint : projection_constraints) { if (constraint["concept"].is_null()) {continue;} string concept_name = constraint["concept"].get(); for (auto values : constraint["values"]) { // We need both the step and the value to proceed. Thus checking // again to reduce bookkeeping. if (values["step"].is_null()) {continue;} if (values["value"].is_null()) {continue;} int step = values["step"].get(); double ind_value = values["value"].get(); // NOTE: Delphi is capable of attaching multiple indicators to a // single concept. Since we are constraining the latent state, // we can constrain (or intervene) based on only one of those // indicators. By passing an empty indicator name, we choose // to constrain the first indicator attached to a concept // which should be present irrespective of whether this // concept has one or more indicators attached to it. // // Requested prediction stat epoch is earlier than the training // start epoch. The first requested prediction epoch after the // training start epoch is skip_steps after the requested prediction // start epoch and the constraints within those skiped epochs // cannot be applied and hence are been ignored. if(step >= skip_steps){ this->add_constraint(step-skip_steps, concept_name, "", ind_value); } } } } FormattedProjectionResult AnalysisGraph::run_causemos_projection_experiment_from_json_dict( const nlohmann::json &json_data) { ExperimentStatus es(this->id, this->experiment_id); if (json_data["experimentParam"].is_null()) { throw BadCausemosInputException("Experiment parameters null"); } auto projection_parameters = json_data["experimentParam"]; if (projection_parameters["startTime"].is_null()) { throw BadCausemosInputException("Projection start time null"); } long proj_start_epoch = projection_parameters["startTime"].get(); if (projection_parameters["endTime"].is_null()) { throw BadCausemosInputException("Projection end time null"); } long proj_end_epoch = projection_parameters["endTime"].get(); if (projection_parameters["numTimesteps"].is_null()) { throw BadCausemosInputException("Projection number of time steps null"); } this->pred_timesteps = projection_parameters["numTimesteps"].get(); if(proj_start_epoch > proj_end_epoch) { throw BadCausemosInputException("Projection end epoch is before projection start epoch"); } int skip_steps = 0; if (this->observed_state_sequence.empty()) { // No training data has been provided // "Train" (more like derive) a model based on prior distributions cout << "\nNOTE:\n\t\"Training\" a model, without any training observations, using only prior distributions!\n"; this->train_start_epoch = -1; this->pred_start_timestep = 0; this->delta_t = 1; this->pred_timesteps++; } else { /* this->pred_start_timestep = this->epoch_to_timestep( proj_start_epoch, this->train_start_epoch, this->num_modeling_timesteps_per_one_observation_timestep); double pred_end_timestep = this->epoch_to_timestep( proj_end_epoch, this->train_start_epoch, this->num_modeling_timesteps_per_one_observation_timestep); this->delta_t = (pred_end_timestep - this->pred_start_timestep) / (this->pred_timesteps - 1.0); */ tuple train_start_date = this->epoch_to_year_month_date(this->train_start_epoch); tuple pred_start_date = this->epoch_to_year_month_date(proj_start_epoch); this->pred_start_timestep = delphi::utils::observation_timesteps_between( train_start_date, pred_start_date, this->model_data_agg_level); this->delta_t = 1; tuple pred_end_date = this->epoch_to_year_month_date(proj_end_epoch); //int num_pred_months = delphi::utils::observation_timesteps_between(pred_start_date, pred_end_date); //cout << "(" << get<0>(pred_end_date) << "-" << get<1>(pred_end_date) << "-" << get<2>(pred_end_date) << "), "; //cout << "(" << get<0>(pred_start_date) << "-" << get<1>(pred_start_date) << "-" << get<2>(pred_start_date) << "), "; // To help clamping we predict one additional timestep this->pred_timesteps++; this->pred_start_timestep -= this->delta_t; // Prevent predicting for timesteps earlier than training start time if (this->pred_start_timestep < 0) { cout << "\nNOTE:\n\t\"Predicting\" in the past\n"; /* skip_steps = ceil(abs(this->pred_start_timestep) / this->delta_t); this->pred_start_timestep += skip_steps; this->pred_timesteps -= skip_steps; */ } /* if (this->pred_start_timestep > pred_end_timestep) { throw BadCausemosInputException( "Projection end epoch is before projection start epoch"); } */ } this->extract_projection_constraints(projection_parameters["constraints"], skip_steps); this->generate_latent_state_sequences(this->pred_start_timestep); this->generate_observed_state_sequences(); return this->format_projection_result(); } FormattedProjectionResult AnalysisGraph::format_projection_result() { // Access // [ vertex_name ][ timestep ][ sample ] FormattedProjectionResult result; // To facilitate clamping derivative we start predicting one timestep before // the requested prediction start timestep. We are removing that timestep when // returning the results. for (auto [vert_name, vert_id] : this->name_to_vertex) { result[vert_name] = vector>(this->pred_timesteps - 1, vector(this->res)); for (int ts = 0; ts < this->pred_timesteps - 1; ts++) { for (int samp = 0; samp < this->res; samp++) { result[vert_name][ts][samp] = this->predicted_observed_state_sequences[samp][ts + 1][vert_id][0]; } ranges::sort(result[vert_name][ts]); } } return result; } void AnalysisGraph::sample_transition_matrix_collection_from_prior() { this->transition_matrix_collection.clear(); this->transition_matrix_collection = vector(this->res); for (int sample = 0; sample < this->res; sample++) { for (auto e : this->edges()) { this->graph[e].set_theta(this->graph[e].kde.resample( 1, this->rand_num_generator, this->uni_dist, this->norm_dist)[0]); } // Create this->A_original based on the sampled β and remember it this->set_transition_matrix_from_betas(); this->transition_matrix_collection[sample] = this->A_original; } } /* ============================================================================ Public: Integration with Uncharted's CauseMos interface ============================================================================ */ /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ create-model (public) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ AnalysisGraph AnalysisGraph::from_causemos_json_string(string json_string, double belief_score_cutoff, double grounding_score_cutoff, int kde_kernels ) { AnalysisGraph G; G.n_kde_kernels = kde_kernels; auto json_data = nlohmann::json::parse(json_string); G.from_causemos_json_dict(json_data, belief_score_cutoff, grounding_score_cutoff); return G; } AnalysisGraph AnalysisGraph::from_causemos_json_file(string filename, double belief_score_cutoff, double grounding_score_cutoff, int kde_kernels ) { AnalysisGraph G; G.n_kde_kernels = kde_kernels; auto json_data = load_json(filename); G.from_causemos_json_dict(json_data, belief_score_cutoff, grounding_score_cutoff); return G; } string AnalysisGraph::generate_create_model_response() { using nlohmann::json, ranges::max; json j; j["status"] = this->trained? "ready" : "training"; j["relations"] = {}; j["conceptIndicators"] = {}; for (auto e : this->edges()) { /* vector weights; if (this->trained) { weights = vector(this->graph[e].sampled_thetas.size()); transform(this->graph[e].sampled_thetas.begin(), this->graph[e].sampled_thetas.end(), weights.begin(), [&](double theta) { return (double)tan(theta); }); } else { weights = vector{0.5}; } */ json edge_json = {{"source", this->source(e).name}, {"target", this->target(e).name}, {"weights", this->trained ? vector{((this->graph[e].is_frozen() ? this->graph[e].get_theta() : mean(this->graph[e].sampled_thetas)) + M_PI_2) * M_2_PI - 1 } : this->graph[e].is_frozen() ? vector{(this->graph[e].get_theta() + M_PI_2) * M_2_PI - 1} : vector{0.5}}}; j["relations"].push_back(edge_json); } int num_verts = this->num_vertices(); for (int v = 0; v < num_verts; v++) { Node& n = (*this)[v]; j["conceptIndicators"][n.name] = {{"initialValue", nullptr}, {"scalingFactor", nullptr}, {"scalingBias", nullptr}}; } return j.dump(); } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ create-experiment (public) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ FormattedProjectionResult AnalysisGraph::run_causemos_projection_experiment_from_json_string( string json_string) { using namespace fmt::literals; using nlohmann::json; // During the create-model call we called construct_theta_pdfs() and // serialized them to json. When we recreate the model we load them. So to // prevent Delphi putting cycle to compute them again when we call // initialize_parameters() below, let us inform Delphi that this is a // CauseMos call. // NOTE: In hindsight, I might be able to prevent Delphi calling // initialize_parameters() within train_model() when we train due to // a CauseMos call. I need to revise it to see whether anything is // called out of order. this->causemos_call = true; auto json_data = nlohmann::json::parse(json_string); try { return run_causemos_projection_experiment_from_json_dict(json_data); } catch (BadCausemosInputException& e) { cout << e.what() << endl; // Just a dummy empty prediction to signal that there is an error in // projection parameters. return FormattedProjectionResult(); } } FormattedProjectionResult AnalysisGraph::run_causemos_projection_experiment_from_json_file( string filename) { using namespace fmt::literals; using nlohmann::json; // During the create-model call we called construct_theta_pdfs() and // serialized them to json. When we recreate the model we load them. So to // prevent Delphi putting cycle to compute them again when we call // initialize_parameters() below, let us inform Delphi that this is a // CauseMos call. // NOTE: In hindsight, I might be able to prevent Delphi calling // initialize_parameters() within train_model() when we train due to // a CauseMos call. I need to revise it to see whether anything is // called out of order. this->causemos_call = true; auto json_data = load_json(filename); try { return run_causemos_projection_experiment_from_json_dict(json_data); } catch (BadCausemosInputException& e) { cout << e.what() << endl; // Just a dummy empty prediction to signal that there is an error in // projection parameters. return FormattedProjectionResult(); } } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ edit-weights ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ unsigned short AnalysisGraph::freeze_edge_weight(std::string source_name, std::string target_name, double scaled_weight, int polarity) { int source_id = -1; int target_id = -1; try { source_id = this->name_to_vertex.at(source_name); } catch(const out_of_range &e) { // Source concept does not exist return 2; } try { target_id = this->name_to_vertex.at(target_name); } catch(const out_of_range &e) { // Target concept does not exist return 4; } pair edg = boost::edge(source_id, target_id, this->graph); if (!edg.second) { // There is no edge from source concept to target concept return 8; } if (polarity == 0) { this->graph[edg.first].unfreeze(); // Prevent initializing the model parameters to the MAP estimate of the // previous training run to give a warm start to training this->MAP_sample_number = -1; return 0; } if (scaled_weight < 0 || scaled_weight >= 1) { return 1; } double theta = polarity / abs(polarity) * scaled_weight * M_PI_2; this->graph[edg.first].unfreeze(); this->graph[edg.first].set_theta(theta); this->graph[edg.first].freeze(); this->trained = false; return 0; }