Program Listing for File fourier.cpp

Return to documentation for file (lib/fourier.cpp)

#include "AnalysisGraph.hpp"
#include "CSVWriter.hpp"

using namespace std;

vector<double> AnalysisGraph::generate_frequencies_for_period(int period,
                                                              int n_components) {
    vector<double> freqs(n_components);

    // λ is the amount we have to stretch/shrink pure sinusoidal curves to make
    // one sine cycle = period radians
    double lambda = 2.0 * M_PI / period;

    // ω is the frequency of each stretched/shrunk sinusoidal curve.
    // With respect to trigonometric function differentiation, the effective
    // frequency of pure sinusoidal curves is λω
    for (int omega = 1; omega <= n_components; omega++) {
        freqs[omega - 1] = lambda * omega;
    }

    return freqs;
}

pair<Eigen::MatrixXd, Eigen::VectorXd>
AnalysisGraph::assemble_sinusoidal_generating_LDS(const vector<double> &freqs) {
    int lds_size = freqs.size() * 2;

    // For each effective frequency, there are two rows in the transition matrix:
    //      1) for first derivative and
    //      2) for second derivative
    // In the state vector these rows are:
    //      1) the value
    //      2) the first derivative---
    Eigen::MatrixXd A_sin = Eigen::MatrixXd::Zero(lds_size, lds_size);
    Eigen::VectorXd s0_sin = Eigen::VectorXd::Zero(lds_size);

    for (int i = 0; i < freqs.size(); i++) {
        int i2 = i * 2;     // first derivative matrix row
        int i2p1 = i2 + 1;  // second derivative matrix row

        // Assembling a block diagonal matrix
        // with each 2 x 2 block having the format
        // columns ->   2i        2i+1
        //            _________________
        // row 2i    |   0          1 |
        // row 2i+1  | -(λω)^2      0 |
        // ω = i+1 & λ = 2π/period
        A_sin(i2, i2p1) = 1;
        A_sin(i2p1, i2) = -freqs[i] * freqs[i];

        // Considering t₀ = 0 radians, the initial state of the sinusoidal
        // generating LDS is:
        // row 2i    |    sin(λω 0) |  = |  0 |
        // row 2i+1  | λω cos(λω 0) |    | λω |
        // ω = i+1 & λ = 2π/period
        s0_sin(i2p1) = freqs[i];
    }

    return make_pair(A_sin, s0_sin);
}

/*
// 1087
pair<Eigen::MatrixXd, Eigen::VectorXd>
AnalysisGraph::assemble_sinusoidal_generating_LDS(unsigned short components,
                                                  unsigned short period) {
    unsigned short comps_2 = components * 2;
    Eigen::MatrixXd A_sin_k = Eigen::MatrixXd::Zero(comps_2, comps_2);
    Eigen::VectorXd s0_sin_k = Eigen::VectorXd::Zero(comps_2);

    // Assembling a block diagonal matrix
    // with each 2 x 2 block having the format (ω = i + 1)
    // colunms           2i               2i+1
    //            _____________________________
    // row 2i    |        0                 1 |
    // row 2i+1  | -(2πω / period)^2        0 |
    for (int i = 0; i < components; i++) {
        int i2 = i * 2;
        int i2p1 = i2 + 1;
        int ip1 = i + 1;
        double combined_frequency = 2.0 * M_PI * ip1 / period;
        A_sin_k(i2, i2p1) = 1;
        A_sin_k(i2p1, i2) = -combined_frequency * combined_frequency;

        // Considering t0 = 0 radians, the initial state of the sinusoidal
        // generating LDS is:
        // row 2i    |       sin(0)          |  = |       0        |
        // row 2i+1  | (2πω / period) cos(0) |    | (2πω / period) |
        // s0_sin_k(i2p1) = ip1 * cos(ip1 * M_PI); //#(-1)**(ip1)*ip1
        s0_sin_k(i2p1) = combined_frequency;
    }

    return make_pair(A_sin_k, s0_sin_k);
}
 */

Eigen::MatrixXd AnalysisGraph::evolve_LDS(const Eigen::MatrixXd &A_base,
                                          const Eigen::VectorXd &_s0,
                                          int n_modeling_time_steps,
                                          double step_size) {
    int tot_steps = int(n_modeling_time_steps / step_size);
    int lds_size = _s0.size();

    // Transition matrix to advance the system one step_size forward
    Eigen::MatrixXd A_step = (A_base * step_size).exp();

    // A matrix to accumulate predictions of the system
    Eigen::MatrixXd preds = Eigen::MatrixXd::Zero(lds_size, tot_steps);
    preds.col(0) = _s0;

    // Evolve the LDS one step_size at a time for desired number of steps
    for (int col = 1; col < tot_steps; col++) {
        preds.col(col) = A_step * preds.col(col - 1);
    }

    return preds;
}

Eigen::MatrixXd
AnalysisGraph::generate_sinusoidal_values_for_bins(
                                              const Eigen::MatrixXd &A_sin_base,
                                              const Eigen::VectorXd &s0_sin,
                                              int period) {
    // Evolve the sinusoidal generating LDS one step at a time for a whole
    // period to generate all the sinusoidal values required for the Fourier
    // reconstruction of the seasonal time series. Column t provides
    // sinusoidal values required for bin t.
    Eigen::MatrixXd sinusoidals = this->evolve_LDS(A_sin_base, s0_sin, period,
                                                                             1);

    // Transpose the sinusoidal matrix so that row t contains the sinusoidal
    // values for bin t.
    sinusoidals.transposeInPlace();

    return sinusoidals;
}

// NOTE: This method could be made a method of the Node class. The best
//       architecture would be to make a subclass, HeadNode, of Node class and
//       include this method there. At the moment we incrementally create the
//       graph while identifying head nodes, we are using Node objects
//       everywhere. To follow the HeadNode subclass specialization route, we
//       either have to replace Node objects with HeadNode objects or do a first
//       pass through the input to identify head nodes and then create the graph.
void
AnalysisGraph::compute_fourier_coefficients_from_least_square_optimization(
                                           const Eigen::MatrixXd &sinusoidals,
                                           int n_components,
                                           vector<int> &head_node_ids) {
    int tot_sinusoidal_rows = 2 * n_components;

    for (auto hn_id: head_node_ids) {
        Node& hn = (*this)[hn_id];

        /* Setting up the linear system Ux = y to solve for the
         * Fourier coefficients using the least squares optimization */
        // Adding one additional column for cos(0) term
        Eigen::MatrixXd U = Eigen::MatrixXd::Zero(hn.tot_observations,
                                                  tot_sinusoidal_rows + 1);

        // Setting the coefficient for cos(0) term (α₀). In the traditional
        // Fourier decomposition this is 0.5 and when α₀ is used, we have to
        // divide it by 2.
        // To avoid this additional division, here we use 1 instead.
        U.col(0) = Eigen::VectorXd::Ones(hn.tot_observations); // ≡ cos(0)
        Eigen::VectorXd y = Eigen::VectorXd::Zero(hn.tot_observations);

        int row = 0;

        // Iterate through all the bins (partitions)
        for (auto [bin, data] : hn.partitioned_data) {

            // Iterate through all the observations in one bin
            for (double obs : data.second) {
                // Only the first tot_sinusoidal_rows columns of the
                // sinusoidals are used.
                U.block(row, 1, 1, tot_sinusoidal_rows) =
                                      sinusoidals.block(bin, 0,
                                                        1, tot_sinusoidal_rows);
                y(row) = obs;
                row++;
            }
        }

        hn.fourier_coefficients =
            U.bdcSvd(Eigen::ComputeThinU | Eigen::ComputeThinV).solve(y);
    }
}

pair<Eigen::MatrixXd, Eigen::VectorXd>
AnalysisGraph::assemble_head_node_modeling_LDS(
                                       const Eigen::MatrixXd &A_sin_base,
                                       const Eigen::VectorXd &s0_sin,
                                       int n_components,
                                       int n_concepts,
                                       unordered_map<int, int> &hn_to_mat_row) {

    int tot_concept_rows = 2 * n_concepts;
    int tot_sinusoidal_rows = 2 * n_components;
    int lds_size = tot_concept_rows + tot_sinusoidal_rows;

    unordered_map<double, int> frequency_to_idx;
    for (int i = 0; i < tot_sinusoidal_rows; i += 2) {
        // Since we are using t₀ = 0 radians as the initial time point, the
        // odd rows of the initial state of the sinusoidal generating LDS,
        // s0_sin, contains all the effective frequencies (λω) used to generate
        // the sinusoidal curves of different effective frequencies. Here we
        // are extracting those and assign them to the rows of the complete LDS.
        // The first tot_concept_rows of the system are used to model the actual
        // concepts. The latter tot_sinusoidal_rows are used to generate the
        // sinusoidal curves of all the effective frequencies used to model the
        // seasonal head nodes.
        frequency_to_idx[s0_sin(i + 1)] = tot_concept_rows + i;
    }

    Eigen::MatrixXd A_complete_base = Eigen::MatrixXd::Zero(lds_size, lds_size);

    // TODO: We do not have to do this within this method, when assembling the
    // final LDS, we cold do this to the matrix and initial state returned by
    // this method.
    //A_complete_base.topLeftCorner(tot_concept_rows, tot_concept_rows) =
    //                                                             A_concept_base;

    A_complete_base.bottomRightCorner(tot_sinusoidal_rows, tot_sinusoidal_rows)
           = A_sin_base.topLeftCorner(tot_sinusoidal_rows, tot_sinusoidal_rows);

    Eigen::VectorXd s0_complete = Eigen::VectorXd::Zero(lds_size);
    s0_complete.tail(tot_sinusoidal_rows) = s0_sin.head(tot_sinusoidal_rows);

    for (auto [hn_id, dot_row]: hn_to_mat_row) {
        Node& hn = (*this)[hn_id];

        int dot_dot_row = dot_row + 1;

        // Sinusoidal coefficient vector to calculate initial value for concept v
        Eigen::VectorXd v0 = Eigen::VectorXd::Zero(hn.fourier_coefficients.size());

        // Coefficient for cos(0) term (α₀). In the traditional Fourier
        // decomposition this is 0.5. When we compute α₀ we include this factor
        // into α₀ (Instead of computing α₀ as in the traditional Fourier series,
        // we compute α₀/2 straightaway).
        v0(0) = 1;

        for (int concept_freq_idx = 0; concept_freq_idx < hn.n_components;
             concept_freq_idx++) {

            double concept_freq = hn.fourier_freqs[concept_freq_idx]; // λω
            double concept_freq_squared = concept_freq * concept_freq;

            int concept_freq_idx_2 = concept_freq_idx * 2;
            int beta_omega_idx = concept_freq_idx_2 + 1;
            int alpha_omega_idx = concept_freq_idx_2 + 2;

            // Coefficient for sin(λω t) terms ≡ β_ω
            double beta_omega = hn.fourier_coefficients[beta_omega_idx];
            // Coefficient for λω cos(λω t) terms ≡ α_ω
            double alpha_omega = hn.fourier_coefficients[alpha_omega_idx];

            int sin_idx = frequency_to_idx[concept_freq];
            int cos_idx = sin_idx + 1;

            // Setting coefficients of the first derivative of the head node
            // hn_id. They are in row 2 * hn_id in the transition matrix.

            // Setting coefficients for sin terms: -freq^2 * cos_coefficient
            A_complete_base(dot_row, sin_idx) = -concept_freq_squared *
                                                 alpha_omega;

            // Setting coefficients for cos terms: sin_coefficient
            A_complete_base(dot_row, cos_idx) = beta_omega;

            // Setting coefficients of the second derivative of the head node
            // hn_id. They are in row 2 * hn_id + 1 in the transition matrix.

            // Setting coefficients for sin terms: -freq^2 * sin_coefficient
            A_complete_base(dot_dot_row, sin_idx) = -concept_freq_squared *
                                                     beta_omega;

            // Setting coefficients for cos terms: -freq^2 * cos_coefficient
            A_complete_base(dot_dot_row, cos_idx) = -concept_freq_squared *
                                                     alpha_omega;

            // Populating the sinusoidal coefficient vector to compute the
            // initial value for the head node hn_id.
            v0(beta_omega_idx) = s0_complete(sin_idx);
            v0(alpha_omega_idx) = s0_complete(cos_idx);
        }

        // Setting the initial value for head node hn_id.
        s0_complete(dot_row) = hn.fourier_coefficients.dot(v0);

        // Setting the initial derivative for head node hn_id.
        s0_complete(dot_dot_row) = A_complete_base.row(dot_row).dot(s0_complete);
    }

    return make_pair(A_complete_base, s0_complete);
}

bool AnalysisGraph::determine_the_best_number_of_components(
                                   const Eigen::MatrixXd & A_hn_period_base,
                                   const Eigen::VectorXd & s0_hn_period,
                                   int period,
                                   int n_components,
                                   unordered_map<int, int> &hn_to_mat_row) {

    Eigen::MatrixXd A_till_first_midpoint = (A_hn_period_base * 0.5).exp();

    // Evolve the system for 1 period of time steps at between bin midpoints.
    // Column b of preds contain the predictions for the midpoint between bin
    // b and bin (b + 1) % period.
    Eigen::MatrixXd preds = this->evolve_LDS(A_hn_period_base,
                                             A_till_first_midpoint *
                                                                   s0_hn_period,
                                             period, 1);

    // Track whether the rmse for at least one head node got reduced for this
    // number of components. This means we have to check the rmse for
    // n_components + 1. Monitoring this allows us to stop training when rmses
    // are not reducing for all the head nodes with the specified period.
    bool rmses_are_reducing = false;

    for (auto [hn_id, hn_preds_row]: hn_to_mat_row) {
        // NOTE: This for loop could be made a method of the Node class. The
        //       best architecture would be to make a subclass, HeadNode, of
        //       Node class and include this for loop as a method there. At the
        //       moment we incrementally create the graph while identifying head
        //       nodes, we are using Node objects everywhere. To follow the
        //       HeadNode subclass specialization route, we either have to
        //       replace Node objects with HeadNode objects or do a first pass
        //       through the input to identify head nodes and then create the
        //       graph.
        Node& hn = (*this)[hn_id];
        vector<double> errors;

        // TODO: Just for debugging delete
        //for (auto [midpoint, vals] : hn.partitioned_data) {
            //for (double val : vals.second) {

        // Iterate through all the midpoint bins
        for (auto [midpoint, vals] : hn.between_bin_midpoints) {
            // Iterate through all the linear interpolated midpoints in the
            // current midpoint bin.
            for (double val : vals) {
                errors.push_back(val - preds(hn_preds_row, midpoint));
            }
        }

        double rmse = -1;

        if (errors.size() > 0) {
            rmse = sqrt(inner_product(errors.begin(), errors.end(),
                                      errors.begin(), 0.0) / errors.size());
        }

        // cout << n_components << " : " << rmse << endl;
        if (hn.rmse_is_reducing) {
            // RMSE for this head node got reduced for this head node for
            // n_components - 1 components.
            hn.rmse_is_reducing = (rmse < hn.best_rmse);
            if (hn.rmse_is_reducing) {
                // RMSE for this head node got reduced this time as well for
                // n_components components.

                // Remember that rmse for at least one head node got reduced
                // during this run. So, we need to check again for
                // n_components + 1 components.
                rmses_are_reducing = true;

                // Remember the best model we have so far.
                hn.best_rmse = rmse;
                hn.best_n_components = n_components;
                hn.best_fourier_coefficients = hn.fourier_coefficients;
            }
        }
    }
    return rmses_are_reducing;
}

pair<Eigen::MatrixXd, Eigen::VectorXd>
AnalysisGraph::assemble_all_seasonal_head_node_modeling_LDS(
                                  unordered_set<double> fourier_frequency_set,
                                  int n_concepts,
                                  unordered_map<int, int> &hn_to_mat_row) {

    // Prepare all the sinusoidal frequencies needed to model all the
    // seasonal head nodes possibly with various periods.
    vector<double> all_freqs(fourier_frequency_set.begin(),
                             fourier_frequency_set.end());

    // This is not as essential step. But it makes the transition matrix more
    // ordered, easily inspectable and helps debugging.
    sort(all_freqs.begin(), all_freqs.end());

    auto [A_sin_all_base, s0_sin_all] =
                            this->assemble_sinusoidal_generating_LDS(all_freqs);

    auto [A_concept_full_base, s0_concept_full] =
                        this->assemble_head_node_modeling_LDS(A_sin_all_base,
                                                              s0_sin_all,
                                                              all_freqs.size(),
                                                              n_concepts,
                                                              hn_to_mat_row);

    return make_pair(A_concept_full_base, s0_concept_full);
}

void AnalysisGraph::merge_concept_LDS_into_seasonal_head_node_modeling_LDS(
                                          const Eigen::MatrixXd &A_concept_base,
                                          const Eigen::VectorXd s0_concept) {
    // Merge the initial state for concepts with the initial state for
    // Fourier decomposition based seasonal head node model.
    this->current_latent_state = this->s0_fourier;
    this->current_latent_state.head(s0_concept.size()) += s0_concept;

    // Merge the transition matrix for concepts with the transition matrix
    // for Fourier decomposition based seasonal head node model.
    this->A_fourier_base.topLeftCorner(A_concept_base.rows(),
                                       A_concept_base.cols()) =
                                                                 A_concept_base;
}

void AnalysisGraph::predictions_to_csv(const Eigen::MatrixXd &A_base,
                                       const Eigen::VectorXd &_s0,
                                       int n_time_steps) {

    CSVWriter writer("head_node_predictions_" +
                     to_string(_s0.size() / 2) + ".csv");
    int lds_size = _s0.size();

    // Evolve the LDS one step_size at a time for desired number of steps
    Eigen::MatrixXd preds = this->evolve_LDS(A_base, _s0, n_time_steps, 0.25);

    int tot_steps = preds.cols();

    // Transpose the prediction matrix so that
    //      col 2i   - Predictions for variable i in the system
    //      col 2i+1 - Derivatives for variable i in the system
    // Each row is a time step
    // This has tot_step rows and lds_size columns.
    preds.transposeInPlace();

    // Output the whole prediction matrix, including derivatives, into a csv
    // file.
    for (int step = 0; step < tot_steps; step++) {
        vector<double> preds_at_step(lds_size);
        Eigen::VectorXd::Map(&preds_at_step[0], lds_size) = preds.row(step);
        writer.write_row(preds_at_step.begin(), preds_at_step.end());
    }
}

std::pair<Eigen::MatrixXd, Eigen::VectorXd>
AnalysisGraph::fit_seasonal_head_node_model_via_fourier_decomposition() {
    // Group seasonal head nodes according to their seasonality.
    unordered_map<int, vector<int>> period_to_head_nodes;

    for (int head_node_id: this->head_nodes) {
        Node& hn = (*this)[head_node_id];
        period_to_head_nodes[hn.period].push_back(head_node_id);
    }

    std::unordered_set<double> fourier_frequency_set;

    for (auto [period, hn_ids]: period_to_head_nodes) {
        // The maximum number of components we are going to evaluate in search
        // of the best number of components to be used.
        // max_k < period / 2 (Nyquist theorem)
        int max_k = period / 2;

        // Generate the maximum number of sinusoidal frequencies needed for the
        // period. By the Nyquist theorem this number is floor(period / 2)
        // The actual number of frequencies used to model a concept could be
        // less than this, which is decided by computing the root mean squared
        // error of the predictions for each number of components
        vector<double> period_freqs =
                           this->generate_frequencies_for_period(period, max_k);

        // Assemble the sinusoidal generating LDS with the maximum number of
        // components. This includes all the sinusoidals needed for every lesser
        // number of components we are going to try out.
        auto [A_sin_max_k_base, s0_sin_max_k] =
                         this->assemble_sinusoidal_generating_LDS(period_freqs);

        Eigen::MatrixXd sinusoidals = this->generate_sinusoidal_values_for_bins(
                                        A_sin_max_k_base, s0_sin_max_k, period);

        // Assign transition matrix rows to head nodes with this period.
        // NOTE: At this moment we do not need to reformat the head node vector
        //       this way. We could just use the head node vector as it is and
        //       compute the transition matrix row based on the index at which
        //       each head node is at.
        //       I am doing this to make assemble_head_node_modeling_LDS()
        //       method more generalized so that I could reuse it to assemble
        //       the final complete LDS with all the nodes (seasonal head nodes
        //       with different periods and body nodes).
        //       At the moment, in the complete system, head nodes does not
        //       occupy a contiguous range of rows in the transition matrix.
        unordered_map<int, int> hn_to_mat_row;
        for (int v = 0; v < hn_ids.size(); v++) {
            hn_to_mat_row[hn_ids[v]] = 2 * v;

            // Again in the light of reusing the LDS assembly code to assemble
            // the complete LDS
            Node& hn = (*this)[hn_ids[v]];
            hn.fourier_freqs = period_freqs;
        }

        for (int components = 0; components <= max_k; components++) {
            for (auto hn_id: hn_ids) {
                // Again in the light of reusing the LDS assembly code to assemble
                // the complete LDS
                Node& hn = (*this)[hn_id];
                hn.n_components = components;
            }

            this->compute_fourier_coefficients_from_least_square_optimization(
                                            sinusoidals, components, hn_ids);

            // Assemble the LDS that generates the head nodes with this
            // period using this number of components.
            auto [A_concept_period_base, s0_concept_period] =
                    this->assemble_head_node_modeling_LDS(A_sin_max_k_base,
                                                          s0_sin_max_k,
                                                          components,
                                                          hn_to_mat_row.size(),
                                                          hn_to_mat_row);

            bool rmses_are_reducing = determine_the_best_number_of_components(
                                                          A_concept_period_base,
                                                          s0_concept_period,
                                                          period,
                                                          components,
                                                          hn_to_mat_row);

            if (!rmses_are_reducing) {
                break;
            }
        }

        // Node& hn_dbg = (*this)[hn_ids[0]];  // TODO: Just for debugging delete
        // cout << "Best: " << hn_dbg.best_n_components << " : "
        //                                          << hn_dbg.best_rmse << endl;

        // Accumulate all the fourier frequencies needed to model all the head
        // nodes with this period
        int max_n_components_for_period = 0;
        for (auto hn_id: hn_ids) {
            Node& hn = (*this)[hn_id];

            // In the assemble_head_node_modeling_LDS() method
            // Node object members: n_components and fourier_coefficients are
            // accessed.
            // When we assemble the final transition matrix with all the fitted
            // seasonal head nodes and all the body nodes, we have to use Node
            // object members: best_n_components and best_fourier_coefficients,
            // for the final system to utilize the fitted seasonal models.
            // Therefore, we reassign best_n_components and
            // best_fourier_coefficient to n_components and
            // fourier_coefficients members so that we could reuse the
            // assemble_head_node_modeling_LDS() method
            // seamlessly without any change or state checking at the time of
            // assembling the transition matrix for the final complete system.
            hn.n_components = hn.best_n_components;
            hn.fourier_coefficients = hn.best_fourier_coefficients;
            hn.best_fourier_coefficients.resize(0);  // To save some memory

            if (hn.best_n_components > max_n_components_for_period) {
                max_n_components_for_period = hn.best_n_components;
            }
        }

        // Remember all the sinusoidal frequencies needed to fit the best
        // Fourier decomposition based seasonal model each of the
        // seasonal head nodes with this period.
        fourier_frequency_set.insert(period_freqs.begin(),
                            period_freqs.begin() + max_n_components_for_period);
    }

    // Assign transition matrix rows to seasonal head nodes.
    unordered_map<int, int> hn_to_mat_row;
    // Using only the seasonal head nodes
    //int row = 0;
    //for (int hn_id: this->head_nodes) {
    //    hn_to_mat_row[hn_id] = row;
    //    row += 2;
    //}
    //int n_concepts = this->head_nodes.size();
    // Using all the nodes
    for (int hn_id: this->head_nodes) {
        hn_to_mat_row[hn_id] = 2 * hn_id;
    }
    int n_concepts = this->num_vertices();

    std::pair<Eigen::MatrixXd, Eigen::VectorXd> seasonal_head_node_LDS =
      this->assemble_all_seasonal_head_node_modeling_LDS(fourier_frequency_set,
                                                         n_concepts,
                                                         hn_to_mat_row);

    //this->predictions_to_csv(seasonal_head_node_LDS.first,
    //                         seasonal_head_node_LDS.second, 24);

    return seasonal_head_node_LDS;
}