.. _program_listing_file_lib_parameter_initialization.cpp: Program Listing for File parameter_initialization.cpp ===================================================== |exhale_lsh| :ref:`Return to documentation for file ` (``lib/parameter_initialization.cpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #include "AnalysisGraph.hpp" #include using namespace std; using namespace delphi::utils; /* ============================================================================ Private: Initializing model parameters ============================================================================ */ void AnalysisGraph::initialize_parameters(int res, InitialBeta initial_beta, InitialDerivative initial_derivative, bool use_heuristic, bool use_continuous) { this->initialize_random_number_generator(); this->uni_disc_dist = uniform_int_distribution (0, this->concept_sample_pool.size() - 1); this->uni_disc_dist_edge = uniform_int_distribution (0, this->edge_sample_pool.size() - 1); this->res = res; this->continuous = use_continuous; this->data_heuristic = use_heuristic; this->find_all_paths(); this->construct_theta_pdfs(); this->init_betas_to(initial_beta); this->set_indicator_means_and_standard_deviations(); this->assemble_base_LDS(initial_derivative); if (this->head_node_model == HeadNodeModel::HNM_NAIVE) { this->generate_head_node_latent_sequences( -1, accumulate(this->modeling_timestep_gaps.begin() + 1, this->modeling_timestep_gaps.end(), 0) + 1); } this->log_likelihood = 0; this->coin_flip = 0; // This initializes the previous_log_likelihood to 0 and sets the // log_likelihood for the current initialization of the parameters. Since // we have initialized coin_flip = 0, log likelihood is computed as if a // θ has been sampled. Therefore, this call also computes the matrix // exponentials for the initial transition matrix that defines the // relationships between concepts in turn initializing the e_A_ts map that // holds computed matrix exponentials for different gaps we have to advance // the system to reach all the modeling time steps where there is data. this->set_log_likelihood(); this->log_likelihood_MAP = -(DBL_MAX - 1); this->transition_matrix_collection.clear(); this->initial_latent_state_collection.clear(); this->transition_matrix_collection = vector(this->res); this->initial_latent_state_collection = vector(this->res); } void AnalysisGraph::init_betas_to(InitialBeta ib) { if (this->MAP_sample_number > -1) { // Warm start using the MAP estimate of the previous training run for (EdgeDescriptor e : this->edges()) { this->graph[e].set_theta(this->graph[e].sampled_thetas[this->MAP_sample_number]); this->graph[e].compute_logpdf_theta(); } return; } switch (ib) { // Initialize the initial β for this edge // Note: I am repeating the loop within each case for efficiency. // If we embed the switch within the for loop, there will be less code // but we will evaluate the switch for each iteration through the loop case InitialBeta::ZERO: for (EdgeDescriptor e : this->edges()) { // β = tan(0.0) = 0 this->graph[e].set_theta(0.0); this->graph[e].compute_logpdf_theta(); } break; case InitialBeta::ONE: for (EdgeDescriptor e : this->edges()) { // θ = atan(1) = Π/4 // β = tan(atan(1)) = 1 this->graph[e].set_theta(std::atan(1)); this->graph[e].compute_logpdf_theta(); } break; case InitialBeta::HALF: for (EdgeDescriptor e : this->edges()) { // β = tan(atan(0.5)) = 0.5 this->graph[e].set_theta(std::atan(0.5)); this->graph[e].compute_logpdf_theta(); } break; case InitialBeta::MEAN: for (EdgeDescriptor e : this->edges()) { this->graph[e].set_theta(this->graph[e].kde.mu); this->graph[e].compute_logpdf_theta(); } break; case InitialBeta::MEDIAN: for (EdgeDescriptor e : this->edges()) { this->graph[e].set_theta(median(this->graph[e].kde.dataset)); this->graph[e].compute_logpdf_theta(); } break; case InitialBeta::PRIOR: for (EdgeDescriptor e : this->edges()) { this->graph[e].set_theta(this->graph[e].kde.most_probable_theta); this->graph[e].compute_logpdf_theta(); } break; case InitialBeta::RANDOM: for (EdgeDescriptor e : this->edges()) { // this->uni_dist() gives a random number in range [0, 1] // Multiplying by M_PI scales the range to [0, M_PI] this->graph[e].set_theta(this->uni_dist(this->rand_num_generator) * M_PI); this->graph[e].compute_logpdf_theta(); } break; } } void AnalysisGraph::set_indicator_means_and_standard_deviations() { int num_verts = this->num_vertices(); vector mean_sequence; vector std_sequence; vector ts_sequence; if (this->observed_state_sequence.empty()) { return; } for (int v = 0; v < num_verts; v++) { Node &n = (*this)[v]; n.clear_state(); for (int i = 0; i < n.indicators.size(); i++) { Indicator &ind = n.indicators[i]; double mean = 0.0001; // The (v, i) pair uniquely identifies an indicator. It is the i th // indicator of v th concept. // For each indicator, there could be multiple observations per // time step. Aggregate those observations to create a single // observation time series for the indicator (v, i). // We also ignore the missing values. mean_sequence.clear(); std_sequence.clear(); ts_sequence.clear(); for (int ts = 0; ts < this->n_timesteps; ts++) { if (this->observed_state_sequence[ts][v].empty()) { // This concept has no indicator specified in the create-model // call continue; } vector &obs_at_ts = this->observed_state_sequence[ts][v][i]; if (!obs_at_ts.empty()) { // There is an observation for indicator (v, i) at ts. ts_sequence.push_back(this->observation_timesteps_sorted[ts]); mean_sequence.push_back(delphi::utils::mean(obs_at_ts)); std_sequence.push_back(delphi::utils::standard_deviation( mean_sequence.back(), obs_at_ts)); if (i == 0 && n.period > 1 && this->head_nodes.find(v) != this->head_nodes.end()) { // Only use observations of the 1st indicator attached to // each concept. Seasonal Fourier model is applied only to // head nodes with period > 1. // For all the concepts ts = 0 is the start int partition = this->observation_timesteps_sorted[ts] % n.period; // Note: if there are multiple observations for some // time steps, the length of time step vector and the // observation vector will be different. n.partitioned_data[partition].first.push_back( this->observation_timesteps_sorted[ts]); // Partition the raw observations for now. // Since we fit the seasonal Fourier model for the latent // variables, we have to divide these raw observations by // the scaling factor (for now we are using the mean) of // the first indicator, which we do not know at the moment // (we are in the process of computing the mean) before // fitting the fourier curve. n.partitioned_data[partition].second.insert( n.partitioned_data[partition].second.end(), obs_at_ts.begin(), obs_at_ts.end()); n.tot_observations += obs_at_ts.size(); } } //else { // This is a missing observation //} } if (mean_sequence.empty()) { continue; } // Set the indicator standard deviation // NOTE: This is what we have been doing earlier for delphi local // run: //stdev = 0.1 * abs(indicator.get_mean()); //stdev = stdev == 0 ? 1 : stdev; //indicator.set_stdev(stdev); // Surprisingly, I did not see indicator standard deviation being set // when delphi is called from CauseMos HMI and in the Indicator class // the default standard deviation was set to be 0. This could be an // omission. I updated Indicator class so that the default indicator // standard deviation is 1. double max_std = ranges::max(std_sequence); if(!isnan(max_std)) { // For indicator (v, i), at least one time step had // more than one observation. // We can use that to assign the indicator standard deviation. // TODO: Is that a good idea? ind.set_stdev(max_std); } // else { // All the time steps had either 0 or 1 observation. // In this case, indicator standard deviation defaults to 1 // TODO: Is this a good idea? //} // Set the indicator mean string aggregation_method = ind.get_aggregation_method(); // TODO: Instead of comparing text, it would be better to define an // enumerated type say AggMethod and use it. Such an enumerated type // needs to be shared between AnalysisGraph and Indicator classes. if (aggregation_method.compare("first") == 0) { if (ts_sequence[0] == 0) { // The first observation is not missing mean = mean_sequence[0]; } else { // First observation is missing // TODO: Decide what to do // I feel getting a decaying weighted average of existing // observations with earlier the observation, higher the // weight is a good idea. TODO: If so how to calculate // weights? We need i < j => w_i > w_j and sum of w's = 1. // For the moment as a placeholder, average of the earliest // available observation is used to set the indicator mean. mean = mean_sequence[0]; } } else if (aggregation_method.compare("last") == 0) { int last_obs_idx = this->n_timesteps - 1; if (ts_sequence.back() == last_obs_idx) { // The first observation is not missing mean = mean_sequence.back(); } else { // First observation is missing // TODO: Similar to "first" decide what to do. // For the moment the observation closest to the final // time step is used to set the indicator mean. ind.set_mean(mean_sequence.back()); } } else if (aggregation_method.compare("min") == 0) { mean = ranges::min(mean_sequence); } else if (aggregation_method.compare("max") == 0) { mean = ranges::max(mean_sequence); } else if (aggregation_method.compare("mean") == 0) { mean = delphi::utils::mean(mean_sequence); } else if (aggregation_method.compare("median") == 0) { mean = delphi::utils::median(mean_sequence); } if (mean != 0) { ind.set_mean(mean); } else { // To avoid division by zero error later on ind.set_mean(0.0001); } // Set mean and standard deviation of the concept based on the mean // and the standard deviation of the first indicator attached to it. if (i == 0) { if (n.has_min) { if (n.indicators[0].mean > 0) { n.min_val = n.min_val_obs / n.indicators[0].mean; } else if (n.indicators[0].mean < 0) { n.max_val = n.min_val_obs / n.indicators[0].mean; } } if (n.has_max) { if (n.indicators[0].mean > 0) { n.max_val = n.max_val_obs / n.indicators[0].mean; } else if (n.indicators[0].mean < 0) { n.min_val = n.max_val_obs / n.indicators[0].mean; } } for (auto& [parititon, obs_in_parititon]: n.partitioned_data) { transform(obs_in_parititon.second.begin(), obs_in_parititon.second.end(), obs_in_parititon.second.begin(), [&](double obs) {return obs / n.indicators[0].mean;}); } transform(mean_sequence.begin(), mean_sequence.end(), mean_sequence.begin(), [&](double obs_mean) {return obs_mean / n.indicators[0].mean;}); // Computing the midpoints if (this->head_node_model == HNM_FOURIER && n.period > 1 && this->head_nodes.find(v) != this->head_nodes.end()) { // Midpoints are only needed for head nodes with period > 1 to // fit the Fourier decomposition based seasonal model. n.linear_interpolate_between_bin_midpoints(ts_sequence, mean_sequence); } else { n.compute_bin_centers_and_spreads(ts_sequence, mean_sequence); } n.mean = delphi::utils::mean(mean_sequence); if (mean_sequence.size() > 1) { n.std = delphi::utils::standard_deviation(n.mean, mean_sequence); } } } } } void AnalysisGraph::construct_theta_pdfs() { // The choice of sigma_X and sigma_Y is somewhat arbitrary here - we need to // come up with a principled way to select this value, or infer it from data. double sigma_X = 1.0; double sigma_Y = 1.0; AdjectiveResponseMap adjective_response_map = this->construct_adjective_response_map( this->rand_num_generator, this->uni_dist, this->norm_dist, this->n_kde_kernels); vector marginalized_responses; for (auto [adjective, responses] : adjective_response_map) { for (auto response : responses) { marginalized_responses.push_back(response); } } marginalized_responses = KDE(marginalized_responses) .resample(this->n_kde_kernels, this->rand_num_generator, this->uni_dist, this->norm_dist); for (auto e : this->edges()) { if (!this->edge(e).kde.log_prior_hist.empty()) { continue; } vector all_thetas = {}; for (Statement stmt : this->graph[e].evidence) { Event subject = stmt.subject; Event object = stmt.object; string subj_adjective = subject.adjective; string obj_adjective = object.adjective; auto subj_responses = lmap( [&](auto x) { return x * subject.polarity; }, get(adjective_response_map, subj_adjective, marginalized_responses)); auto obj_responses = lmap( [&](auto x) { return x * object.polarity; }, get(adjective_response_map, obj_adjective, marginalized_responses)); for (auto [x, y] : ranges::views::cartesian_product(subj_responses, obj_responses)) { all_thetas.push_back(atan((sigma_Y * y) / (sigma_X * x))); } } // TODO: Using n_kde_kernels is a quick hack. Should define another // variable like n_bins to make the number of bins independent from the // number of kernels this->graph[e].kde = KDE(all_thetas, this->n_kde_kernels); // this->graph[e].kde.dataset = all_thetas; } }