.. _program_listing_file_lib_fourier.cpp: Program Listing for File fourier.cpp ==================================== |exhale_lsh| :ref:`Return to documentation for file ` (``lib/fourier.cpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #include "AnalysisGraph.hpp" #include "CSVWriter.hpp" using namespace std; vector AnalysisGraph::generate_frequencies_for_period(int period, int n_components) { vector freqs(n_components); // λ is the amount we have to stretch/shrink pure sinusoidal curves to make // one sine cycle = period radians double lambda = 2.0 * M_PI / period; // ω is the frequency of each stretched/shrunk sinusoidal curve. // With respect to trigonometric function differentiation, the effective // frequency of pure sinusoidal curves is λω for (int omega = 1; omega <= n_components; omega++) { freqs[omega - 1] = lambda * omega; } return freqs; } pair AnalysisGraph::assemble_sinusoidal_generating_LDS(const vector &freqs) { int lds_size = freqs.size() * 2; // For each effective frequency, there are two rows in the transition matrix: // 1) for first derivative and // 2) for second derivative // In the state vector these rows are: // 1) the value // 2) the first derivative--- Eigen::MatrixXd A_sin = Eigen::MatrixXd::Zero(lds_size, lds_size); Eigen::VectorXd s0_sin = Eigen::VectorXd::Zero(lds_size); for (int i = 0; i < freqs.size(); i++) { int i2 = i * 2; // first derivative matrix row int i2p1 = i2 + 1; // second derivative matrix row // Assembling a block diagonal matrix // with each 2 x 2 block having the format // columns -> 2i 2i+1 // _________________ // row 2i | 0 1 | // row 2i+1 | -(λω)^2 0 | // ω = i+1 & λ = 2π/period A_sin(i2, i2p1) = 1; A_sin(i2p1, i2) = -freqs[i] * freqs[i]; // Considering t₀ = 0 radians, the initial state of the sinusoidal // generating LDS is: // row 2i | sin(λω 0) | = | 0 | // row 2i+1 | λω cos(λω 0) | | λω | // ω = i+1 & λ = 2π/period s0_sin(i2p1) = freqs[i]; } return make_pair(A_sin, s0_sin); } /* // 1087 pair AnalysisGraph::assemble_sinusoidal_generating_LDS(unsigned short components, unsigned short period) { unsigned short comps_2 = components * 2; Eigen::MatrixXd A_sin_k = Eigen::MatrixXd::Zero(comps_2, comps_2); Eigen::VectorXd s0_sin_k = Eigen::VectorXd::Zero(comps_2); // Assembling a block diagonal matrix // with each 2 x 2 block having the format (ω = i + 1) // colunms 2i 2i+1 // _____________________________ // row 2i | 0 1 | // row 2i+1 | -(2πω / period)^2 0 | for (int i = 0; i < components; i++) { int i2 = i * 2; int i2p1 = i2 + 1; int ip1 = i + 1; double combined_frequency = 2.0 * M_PI * ip1 / period; A_sin_k(i2, i2p1) = 1; A_sin_k(i2p1, i2) = -combined_frequency * combined_frequency; // Considering t0 = 0 radians, the initial state of the sinusoidal // generating LDS is: // row 2i | sin(0) | = | 0 | // row 2i+1 | (2πω / period) cos(0) | | (2πω / period) | // s0_sin_k(i2p1) = ip1 * cos(ip1 * M_PI); //#(-1)**(ip1)*ip1 s0_sin_k(i2p1) = combined_frequency; } return make_pair(A_sin_k, s0_sin_k); } */ Eigen::MatrixXd AnalysisGraph::evolve_LDS(const Eigen::MatrixXd &A_base, const Eigen::VectorXd &_s0, int n_modeling_time_steps, double step_size) { int tot_steps = int(n_modeling_time_steps / step_size); int lds_size = _s0.size(); // Transition matrix to advance the system one step_size forward Eigen::MatrixXd A_step = (A_base * step_size).exp(); // A matrix to accumulate predictions of the system Eigen::MatrixXd preds = Eigen::MatrixXd::Zero(lds_size, tot_steps); preds.col(0) = _s0; // Evolve the LDS one step_size at a time for desired number of steps for (int col = 1; col < tot_steps; col++) { preds.col(col) = A_step * preds.col(col - 1); } return preds; } Eigen::MatrixXd AnalysisGraph::generate_sinusoidal_values_for_bins( const Eigen::MatrixXd &A_sin_base, const Eigen::VectorXd &s0_sin, int period) { // Evolve the sinusoidal generating LDS one step at a time for a whole // period to generate all the sinusoidal values required for the Fourier // reconstruction of the seasonal time series. Column t provides // sinusoidal values required for bin t. Eigen::MatrixXd sinusoidals = this->evolve_LDS(A_sin_base, s0_sin, period, 1); // Transpose the sinusoidal matrix so that row t contains the sinusoidal // values for bin t. sinusoidals.transposeInPlace(); return sinusoidals; } // NOTE: This method could be made a method of the Node class. The best // architecture would be to make a subclass, HeadNode, of Node class and // include this method there. At the moment we incrementally create the // graph while identifying head nodes, we are using Node objects // everywhere. To follow the HeadNode subclass specialization route, we // either have to replace Node objects with HeadNode objects or do a first // pass through the input to identify head nodes and then create the graph. void AnalysisGraph::compute_fourier_coefficients_from_least_square_optimization( const Eigen::MatrixXd &sinusoidals, int n_components, vector &head_node_ids) { int tot_sinusoidal_rows = 2 * n_components; for (auto hn_id: head_node_ids) { Node& hn = (*this)[hn_id]; /* Setting up the linear system Ux = y to solve for the * Fourier coefficients using the least squares optimization */ // Adding one additional column for cos(0) term Eigen::MatrixXd U = Eigen::MatrixXd::Zero(hn.tot_observations, tot_sinusoidal_rows + 1); // Setting the coefficient for cos(0) term (α₀). In the traditional // Fourier decomposition this is 0.5 and when α₀ is used, we have to // divide it by 2. // To avoid this additional division, here we use 1 instead. U.col(0) = Eigen::VectorXd::Ones(hn.tot_observations); // ≡ cos(0) Eigen::VectorXd y = Eigen::VectorXd::Zero(hn.tot_observations); int row = 0; // Iterate through all the bins (partitions) for (auto [bin, data] : hn.partitioned_data) { // Iterate through all the observations in one bin for (double obs : data.second) { // Only the first tot_sinusoidal_rows columns of the // sinusoidals are used. U.block(row, 1, 1, tot_sinusoidal_rows) = sinusoidals.block(bin, 0, 1, tot_sinusoidal_rows); y(row) = obs; row++; } } hn.fourier_coefficients = U.bdcSvd(Eigen::ComputeThinU | Eigen::ComputeThinV).solve(y); } } pair AnalysisGraph::assemble_head_node_modeling_LDS( const Eigen::MatrixXd &A_sin_base, const Eigen::VectorXd &s0_sin, int n_components, int n_concepts, unordered_map &hn_to_mat_row) { int tot_concept_rows = 2 * n_concepts; int tot_sinusoidal_rows = 2 * n_components; int lds_size = tot_concept_rows + tot_sinusoidal_rows; unordered_map frequency_to_idx; for (int i = 0; i < tot_sinusoidal_rows; i += 2) { // Since we are using t₀ = 0 radians as the initial time point, the // odd rows of the initial state of the sinusoidal generating LDS, // s0_sin, contains all the effective frequencies (λω) used to generate // the sinusoidal curves of different effective frequencies. Here we // are extracting those and assign them to the rows of the complete LDS. // The first tot_concept_rows of the system are used to model the actual // concepts. The latter tot_sinusoidal_rows are used to generate the // sinusoidal curves of all the effective frequencies used to model the // seasonal head nodes. frequency_to_idx[s0_sin(i + 1)] = tot_concept_rows + i; } Eigen::MatrixXd A_complete_base = Eigen::MatrixXd::Zero(lds_size, lds_size); // TODO: We do not have to do this within this method, when assembling the // final LDS, we cold do this to the matrix and initial state returned by // this method. //A_complete_base.topLeftCorner(tot_concept_rows, tot_concept_rows) = // A_concept_base; A_complete_base.bottomRightCorner(tot_sinusoidal_rows, tot_sinusoidal_rows) = A_sin_base.topLeftCorner(tot_sinusoidal_rows, tot_sinusoidal_rows); Eigen::VectorXd s0_complete = Eigen::VectorXd::Zero(lds_size); s0_complete.tail(tot_sinusoidal_rows) = s0_sin.head(tot_sinusoidal_rows); for (auto [hn_id, dot_row]: hn_to_mat_row) { Node& hn = (*this)[hn_id]; int dot_dot_row = dot_row + 1; // Sinusoidal coefficient vector to calculate initial value for concept v Eigen::VectorXd v0 = Eigen::VectorXd::Zero(hn.fourier_coefficients.size()); // Coefficient for cos(0) term (α₀). In the traditional Fourier // decomposition this is 0.5. When we compute α₀ we include this factor // into α₀ (Instead of computing α₀ as in the traditional Fourier series, // we compute α₀/2 straightaway). v0(0) = 1; for (int concept_freq_idx = 0; concept_freq_idx < hn.n_components; concept_freq_idx++) { double concept_freq = hn.fourier_freqs[concept_freq_idx]; // λω double concept_freq_squared = concept_freq * concept_freq; int concept_freq_idx_2 = concept_freq_idx * 2; int beta_omega_idx = concept_freq_idx_2 + 1; int alpha_omega_idx = concept_freq_idx_2 + 2; // Coefficient for sin(λω t) terms ≡ β_ω double beta_omega = hn.fourier_coefficients[beta_omega_idx]; // Coefficient for λω cos(λω t) terms ≡ α_ω double alpha_omega = hn.fourier_coefficients[alpha_omega_idx]; int sin_idx = frequency_to_idx[concept_freq]; int cos_idx = sin_idx + 1; // Setting coefficients of the first derivative of the head node // hn_id. They are in row 2 * hn_id in the transition matrix. // Setting coefficients for sin terms: -freq^2 * cos_coefficient A_complete_base(dot_row, sin_idx) = -concept_freq_squared * alpha_omega; // Setting coefficients for cos terms: sin_coefficient A_complete_base(dot_row, cos_idx) = beta_omega; // Setting coefficients of the second derivative of the head node // hn_id. They are in row 2 * hn_id + 1 in the transition matrix. // Setting coefficients for sin terms: -freq^2 * sin_coefficient A_complete_base(dot_dot_row, sin_idx) = -concept_freq_squared * beta_omega; // Setting coefficients for cos terms: -freq^2 * cos_coefficient A_complete_base(dot_dot_row, cos_idx) = -concept_freq_squared * alpha_omega; // Populating the sinusoidal coefficient vector to compute the // initial value for the head node hn_id. v0(beta_omega_idx) = s0_complete(sin_idx); v0(alpha_omega_idx) = s0_complete(cos_idx); } // Setting the initial value for head node hn_id. s0_complete(dot_row) = hn.fourier_coefficients.dot(v0); // Setting the initial derivative for head node hn_id. s0_complete(dot_dot_row) = A_complete_base.row(dot_row).dot(s0_complete); } return make_pair(A_complete_base, s0_complete); } bool AnalysisGraph::determine_the_best_number_of_components( const Eigen::MatrixXd & A_hn_period_base, const Eigen::VectorXd & s0_hn_period, int period, int n_components, unordered_map &hn_to_mat_row) { Eigen::MatrixXd A_till_first_midpoint = (A_hn_period_base * 0.5).exp(); // Evolve the system for 1 period of time steps at between bin midpoints. // Column b of preds contain the predictions for the midpoint between bin // b and bin (b + 1) % period. Eigen::MatrixXd preds = this->evolve_LDS(A_hn_period_base, A_till_first_midpoint * s0_hn_period, period, 1); // Track whether the rmse for at least one head node got reduced for this // number of components. This means we have to check the rmse for // n_components + 1. Monitoring this allows us to stop training when rmses // are not reducing for all the head nodes with the specified period. bool rmses_are_reducing = false; for (auto [hn_id, hn_preds_row]: hn_to_mat_row) { // NOTE: This for loop could be made a method of the Node class. The // best architecture would be to make a subclass, HeadNode, of // Node class and include this for loop as a method there. At the // moment we incrementally create the graph while identifying head // nodes, we are using Node objects everywhere. To follow the // HeadNode subclass specialization route, we either have to // replace Node objects with HeadNode objects or do a first pass // through the input to identify head nodes and then create the // graph. Node& hn = (*this)[hn_id]; vector errors; // TODO: Just for debugging delete //for (auto [midpoint, vals] : hn.partitioned_data) { //for (double val : vals.second) { // Iterate through all the midpoint bins for (auto [midpoint, vals] : hn.between_bin_midpoints) { // Iterate through all the linear interpolated midpoints in the // current midpoint bin. for (double val : vals) { errors.push_back(val - preds(hn_preds_row, midpoint)); } } double rmse = -1; if (errors.size() > 0) { rmse = sqrt(inner_product(errors.begin(), errors.end(), errors.begin(), 0.0) / errors.size()); } // cout << n_components << " : " << rmse << endl; if (hn.rmse_is_reducing) { // RMSE for this head node got reduced for this head node for // n_components - 1 components. hn.rmse_is_reducing = (rmse < hn.best_rmse); if (hn.rmse_is_reducing) { // RMSE for this head node got reduced this time as well for // n_components components. // Remember that rmse for at least one head node got reduced // during this run. So, we need to check again for // n_components + 1 components. rmses_are_reducing = true; // Remember the best model we have so far. hn.best_rmse = rmse; hn.best_n_components = n_components; hn.best_fourier_coefficients = hn.fourier_coefficients; } } } return rmses_are_reducing; } pair AnalysisGraph::assemble_all_seasonal_head_node_modeling_LDS( unordered_set fourier_frequency_set, int n_concepts, unordered_map &hn_to_mat_row) { // Prepare all the sinusoidal frequencies needed to model all the // seasonal head nodes possibly with various periods. vector all_freqs(fourier_frequency_set.begin(), fourier_frequency_set.end()); // This is not as essential step. But it makes the transition matrix more // ordered, easily inspectable and helps debugging. sort(all_freqs.begin(), all_freqs.end()); auto [A_sin_all_base, s0_sin_all] = this->assemble_sinusoidal_generating_LDS(all_freqs); auto [A_concept_full_base, s0_concept_full] = this->assemble_head_node_modeling_LDS(A_sin_all_base, s0_sin_all, all_freqs.size(), n_concepts, hn_to_mat_row); return make_pair(A_concept_full_base, s0_concept_full); } void AnalysisGraph::merge_concept_LDS_into_seasonal_head_node_modeling_LDS( const Eigen::MatrixXd &A_concept_base, const Eigen::VectorXd s0_concept) { // Merge the initial state for concepts with the initial state for // Fourier decomposition based seasonal head node model. this->current_latent_state = this->s0_fourier; this->current_latent_state.head(s0_concept.size()) += s0_concept; // Merge the transition matrix for concepts with the transition matrix // for Fourier decomposition based seasonal head node model. this->A_fourier_base.topLeftCorner(A_concept_base.rows(), A_concept_base.cols()) = A_concept_base; } void AnalysisGraph::predictions_to_csv(const Eigen::MatrixXd &A_base, const Eigen::VectorXd &_s0, int n_time_steps) { CSVWriter writer("head_node_predictions_" + to_string(_s0.size() / 2) + ".csv"); int lds_size = _s0.size(); // Evolve the LDS one step_size at a time for desired number of steps Eigen::MatrixXd preds = this->evolve_LDS(A_base, _s0, n_time_steps, 0.25); int tot_steps = preds.cols(); // Transpose the prediction matrix so that // col 2i - Predictions for variable i in the system // col 2i+1 - Derivatives for variable i in the system // Each row is a time step // This has tot_step rows and lds_size columns. preds.transposeInPlace(); // Output the whole prediction matrix, including derivatives, into a csv // file. for (int step = 0; step < tot_steps; step++) { vector preds_at_step(lds_size); Eigen::VectorXd::Map(&preds_at_step[0], lds_size) = preds.row(step); writer.write_row(preds_at_step.begin(), preds_at_step.end()); } } std::pair AnalysisGraph::fit_seasonal_head_node_model_via_fourier_decomposition() { // Group seasonal head nodes according to their seasonality. unordered_map> period_to_head_nodes; for (int head_node_id: this->head_nodes) { Node& hn = (*this)[head_node_id]; period_to_head_nodes[hn.period].push_back(head_node_id); } std::unordered_set fourier_frequency_set; for (auto [period, hn_ids]: period_to_head_nodes) { // The maximum number of components we are going to evaluate in search // of the best number of components to be used. // max_k < period / 2 (Nyquist theorem) int max_k = period / 2; // Generate the maximum number of sinusoidal frequencies needed for the // period. By the Nyquist theorem this number is floor(period / 2) // The actual number of frequencies used to model a concept could be // less than this, which is decided by computing the root mean squared // error of the predictions for each number of components vector period_freqs = this->generate_frequencies_for_period(period, max_k); // Assemble the sinusoidal generating LDS with the maximum number of // components. This includes all the sinusoidals needed for every lesser // number of components we are going to try out. auto [A_sin_max_k_base, s0_sin_max_k] = this->assemble_sinusoidal_generating_LDS(period_freqs); Eigen::MatrixXd sinusoidals = this->generate_sinusoidal_values_for_bins( A_sin_max_k_base, s0_sin_max_k, period); // Assign transition matrix rows to head nodes with this period. // NOTE: At this moment we do not need to reformat the head node vector // this way. We could just use the head node vector as it is and // compute the transition matrix row based on the index at which // each head node is at. // I am doing this to make assemble_head_node_modeling_LDS() // method more generalized so that I could reuse it to assemble // the final complete LDS with all the nodes (seasonal head nodes // with different periods and body nodes). // At the moment, in the complete system, head nodes does not // occupy a contiguous range of rows in the transition matrix. unordered_map hn_to_mat_row; for (int v = 0; v < hn_ids.size(); v++) { hn_to_mat_row[hn_ids[v]] = 2 * v; // Again in the light of reusing the LDS assembly code to assemble // the complete LDS Node& hn = (*this)[hn_ids[v]]; hn.fourier_freqs = period_freqs; } for (int components = 0; components <= max_k; components++) { for (auto hn_id: hn_ids) { // Again in the light of reusing the LDS assembly code to assemble // the complete LDS Node& hn = (*this)[hn_id]; hn.n_components = components; } this->compute_fourier_coefficients_from_least_square_optimization( sinusoidals, components, hn_ids); // Assemble the LDS that generates the head nodes with this // period using this number of components. auto [A_concept_period_base, s0_concept_period] = this->assemble_head_node_modeling_LDS(A_sin_max_k_base, s0_sin_max_k, components, hn_to_mat_row.size(), hn_to_mat_row); bool rmses_are_reducing = determine_the_best_number_of_components( A_concept_period_base, s0_concept_period, period, components, hn_to_mat_row); if (!rmses_are_reducing) { break; } } // Node& hn_dbg = (*this)[hn_ids[0]]; // TODO: Just for debugging delete // cout << "Best: " << hn_dbg.best_n_components << " : " // << hn_dbg.best_rmse << endl; // Accumulate all the fourier frequencies needed to model all the head // nodes with this period int max_n_components_for_period = 0; for (auto hn_id: hn_ids) { Node& hn = (*this)[hn_id]; // In the assemble_head_node_modeling_LDS() method // Node object members: n_components and fourier_coefficients are // accessed. // When we assemble the final transition matrix with all the fitted // seasonal head nodes and all the body nodes, we have to use Node // object members: best_n_components and best_fourier_coefficients, // for the final system to utilize the fitted seasonal models. // Therefore, we reassign best_n_components and // best_fourier_coefficient to n_components and // fourier_coefficients members so that we could reuse the // assemble_head_node_modeling_LDS() method // seamlessly without any change or state checking at the time of // assembling the transition matrix for the final complete system. hn.n_components = hn.best_n_components; hn.fourier_coefficients = hn.best_fourier_coefficients; hn.best_fourier_coefficients.resize(0); // To save some memory if (hn.best_n_components > max_n_components_for_period) { max_n_components_for_period = hn.best_n_components; } } // Remember all the sinusoidal frequencies needed to fit the best // Fourier decomposition based seasonal model each of the // seasonal head nodes with this period. fourier_frequency_set.insert(period_freqs.begin(), period_freqs.begin() + max_n_components_for_period); } // Assign transition matrix rows to seasonal head nodes. unordered_map hn_to_mat_row; // Using only the seasonal head nodes //int row = 0; //for (int hn_id: this->head_nodes) { // hn_to_mat_row[hn_id] = row; // row += 2; //} //int n_concepts = this->head_nodes.size(); // Using all the nodes for (int hn_id: this->head_nodes) { hn_to_mat_row[hn_id] = 2 * hn_id; } int n_concepts = this->num_vertices(); std::pair seasonal_head_node_LDS = this->assemble_all_seasonal_head_node_modeling_LDS(fourier_frequency_set, n_concepts, hn_to_mat_row); //this->predictions_to_csv(seasonal_head_node_LDS.first, // seasonal_head_node_LDS.second, 24); return seasonal_head_node_LDS; }