/* ---- This file is part of SECONDO. Copyright (C) 2020, Faculty of Mathematics and Computer Science, Database Systems for New Applications. SECONDO is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. SECONDO is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with SECONDO; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ---- //[$][\$] //[_][\_] */ #include "arima.h" using std::to_string; using std::max; // Constructor with all necessary data to predict a timeseries // using AR, MA, ARMA or ARIMA processes. Arima::Arima(int p, int d, int q, int no_predictions, vector timeseries_tuple_data): order_ma(q), order_ar(p), order_differencing(d), steps(no_predictions), timeseries_data(vector(timeseries_tuple_data.size(),0)), fitted_data(vector(timeseries_tuple_data.size() + no_predictions, 0)), differenced_data(vector(timeseries_tuple_data.size(), 0)), phi(timeseries_tuple_data.size(), vector(timeseries_tuple_data.size(), 0)), theta(timeseries_tuple_data.size(), vector(timeseries_tuple_data.size(), 0)), pacf(timeseries_tuple_data.size(), vector(timeseries_tuple_data.size(), 0)), acf(timeseries_tuple_data.size(), vector(timeseries_tuple_data.size(), 0)), ny(timeseries_tuple_data.size(),0), acvf(timeseries_tuple_data.size(),0) { ARMAHelper::getTSData(timeseries_tuple_data, timeseries_data); mean = ARMAHelper::computeMean(timeseries_data); ARMAHelper::computeACF(timeseries_data, acf); ARMAHelper::computePACF(timeseries_data, pacf, acf); for(size_t i = 0; i < timeseries_data.size(); ++i) acvf[i] = ARMAHelper::computeACVF(timeseries_data, i); } //Computes the coefficients for an AR process using the OLS method. Eigen::VectorXd Arima::compute_ar_coeffs() { int i,k; Eigen::Matrix R(order_ar, order_ar); Eigen::Matrix r(order_ar); for(i = 0; i < order_ar; ++i) { for(k= 0; k < order_ar; ++k) R(i,k) = acf[i][k]; } for(i = 0; i < order_ar; ++i) { r(i) = acf[0][i+1]; } return R.colPivHouseholderQr().solve(r); } void Arima::durbin_levinson_algorithm() { int i,j; vector nu(order_ar, 0); phi[0][0] = 1; phi[1][1] = gamma_hat(1) / gamma_hat(0); nu[0] = gamma_hat(0); nu[1] = gamma_hat(0) - phi[1][1] * gamma_hat(1); for(i = 2; i < order_ar; ++i) { long double sum_dividend = 0.0; long double sum_divisor = 0.0; for(j = 0; j <= i-1; ++j) { sum_dividend += phi[i-1][j] * gamma_hat(i-j); sum_divisor += phi[i-1][j] * gamma_hat(j); } phi[i][i] = (gamma_hat(i) -sum_dividend) / (1 -sum_divisor); for(j = 1; i == 2 ? j <= i-1 : j < i-1; j++) { phi[i][j] = phi[i-1][j] - (phi[i][i] *phi[i-1][i-1]); } } } vector Arima::ar() { int i, k, nobs = timeseries_data.size(); vector prediction(nobs + steps, 0); vector demeaned_data(nobs, 0); double mse = 0.0; //demean Eigen::Matrix R(order_ar, order_ar); Eigen::Matrix r(order_ar); for(i = 0; i < order_ar; ++i) { for(k= 0; k < order_ar; ++k) R(i,k) = acf[i][k]; } for(i = 0; i < order_ar; ++i) { r(i) = acf[0][i+1]; } Eigen::VectorXd phis = compute_ar_coeffs(); durbin_levinson_algorithm(); cout << "Rhos: "; for(i = 0; i < order_ar; ++i) { cout << to_string(phis.coeff(i)) <<"; "; } cout << "Durbin-Levinson estimated coefficients:"; for(i = 0; i < order_ar; ++i) { cout << to_string(phi[order_ar-1][i]) <<"; "; } cout < Arima::forecast() { int i, j, n, m, nobs; m = max(order_ar, order_ma); innovations_algorithm(); Eigen::VectorXd phis = compute_ar_coeffs(); if(order_differencing != 0) differencing(); double mean = ARMAHelper::computeMean(timeseries_data); nobs = timeseries_data.size(); vector prediction(timeseries_data.size() + steps, 0); vectorcentered_data(nobs, 0); //Center data for(i = 0; i < nobs; ++i) centered_data[i] = timeseries_data[i] - mean; //Initialize the predictions for MA for(i = 0; i < order_ma; ++i) { prediction[i] = timeseries_data[i]; } for(n = min(order_ar, order_ma); n < nobs + steps -1; ++n ) { double predicted_value = 0.0; if( n < m ) { for(j = 0; j < n; j++) { predicted_value += theta[order_ar][j] * (centered_data[n+1-j] -prediction[n+1-j]); } } else { double prediction_ma = 0.0; double prediction_ar = 0.0; for(j = 0; j < order_ar; ++j) { prediction_ma += theta[order_ar][j] * (centered_data[n+1-j] -prediction[n+1-j]); } for(i = 0 ; i < order_ar; ++i) { double coeff = phis.coeff(i); if(n < nobs) { prediction_ar += coeff * centered_data[n -1 -i]; } else { prediction_ar += coeff * prediction[n -1 -i]; } } predicted_value = prediction_ar + prediction_ma; } prediction[n+1] = predicted_value; } double mse = 0; //restore values for(i = 0; i < nobs + steps; ++i) { prediction[i] = prediction[i] + mean; if(i < nobs) mse += pow(prediction[i] - timeseries_data[i],2); } cout << "MSE: " << to_string(mse/nobs) << endl; return prediction; } //Computes the moving average process utilizing the innovations algorithm as // a estimator for the theta parameters. vector Arima::ma() { innovations_algorithm(); int i, k, nobs = timeseries_data.size(); vector prediction(nobs + steps, 0); vector demeaned_data(nobs, 0); double mse = 0.0; //center data for(i = 0; i < nobs; ++i) { demeaned_data[i] = timeseries_data[i] - mean; } for(i = order_ma; i < nobs; ++i) prediction[i] = demeaned_data[i]; int size = theta[order_ma].size(); cout << "Coeffizients Theta: "; for(i = 0; i < order_ma && i < size; ++i) { cout << to_string(theta[order_ma][i]) << "; "; } cout << endl; // LMFunctor my_functor; // my_functor.m_values = timeseries_data.size(); // my_functor.m_inputs = order_ma; // my_functor.order_ma = order_ma; // my_functor.timeseries_data = timeseries_data; // Eigen::VectorXd x(order_ma); // for(i = 0; i < order_ma; ++i) // x(i) = theta[order_ma][i]; // Eigen::NumericalDiff numDiff(my_functor); // Eigen::LevenbergMarquardt, double> // lm(numDiff); // lm.parameters.xtol = 1.0e-10; // lm.parameters.maxfev = 2000; // lm.minimize(x); // cout << "Levenberg-Marquardt optimized coeffizients: "; // for(i = 0; i < order_ma; ++i) // { // cout << to_string(x(i)) << "; "; // } // cout << endl; for( k = 0; k < nobs + steps; ++k) { double predicted_value = 0; for(i = 0 ; min(i, order_ma) < order_ma; ++i) { double coeff = theta[order_ma][i]; if(i < nobs) { predicted_value += theta[order_ma][i] * (timeseries_data[k + 1 - i] - prediction[k + 1 - i]) ; } else //no further observed values { predicted_value += coeff * (prediction[k + 1 - i]) ; } } prediction[k] = predicted_value; } for(i = 0; i < nobs + steps; ++i) { prediction[i] = prediction[i] + mean; if(i < nobs) { mse += pow(prediction[i] - timeseries_data[i], 2); } } cout << "MSE: " << to_string(mse/nobs) << endl; return prediction; } vector Arima::arma() { vector prediction; return prediction; } vector Arima::arima() { vector prediction; return prediction; } // @brief Arima::innovations_algorithm classic innovations //algorithm to estimate the parameters // of an MA process. void Arima::innovations_algorithm() { ny[0] = acvf[0]; for ( size_t i = 1; i < timeseries_data.size(); ++i) { for( size_t k = 0; k < i; ++k ) { double sub = 0; for( size_t j = 0; j < k; ++j) { sub += theta[k][k-j] * theta[i][i-j] * ny[j]; } theta[i][i-k] = 1 / ny[k] * (acvf[i-k] - sub); ny[i] = acvf[0]; for ( size_t j = 0; j <= i; ++j) { ny[i] -= pow( theta[i][i-j], 2) * ny[j]; } } } int size = timeseries_data.size() -1; for(int i = 0; i < size; ++i) { for(int j = 0; j < size; ++j) { theta[i][j] = theta[i + 1][j + 1]; } } } // Differences the timeseries data with the given order to make it stationary. void Arima::differencing() { int i, j, nobs = timeseries_data.size(); for(i = 0; i < order_differencing; ++i) { for( j = 0; j < nobs - i + order_differencing ; ++j) { timeseries_data[j] = timeseries_data[j] - timeseries_data[i]; } } timeseries_data.erase(timeseries_data.begin() + nobs - order_differencing); timeseries_data.shrink_to_fit(); }